diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py index ae0da7db2..1217c81ed 100644 --- a/benchmarks/benchmarks/__init__.py +++ b/benchmarks/benchmarks/__init__.py @@ -11,3 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""Benchmarks for PyMC.""" diff --git a/benchmarks/benchmarks/benchmarks.py b/benchmarks/benchmarks/benchmarks.py index 0cf4d5e36..7485cef65 100644 --- a/benchmarks/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks/benchmarks.py @@ -24,7 +24,7 @@ def glm_hierarchical_model(random_seed=123): - """Sample glm hierarchical model to use in benchmarks""" + """Sample glm hierarchical model to use in benchmarks.""" np.random.seed(random_seed) data = pd.read_csv(pm.get_data("radon.csv")) data["log_radon"] = data["log_radon"].astype(pytensor.config.floatX) @@ -47,7 +47,7 @@ def glm_hierarchical_model(random_seed=123): def mixture_model(random_seed=1234): - """Sample mixture model to use in benchmarks""" + """Sample mixture model to use in benchmarks.""" np.random.seed(1234) size = 1000 w_true = np.array([0.35, 0.4, 0.25]) @@ -77,10 +77,7 @@ def mixture_model(random_seed=1234): class OverheadSuite: - """ - Just tests how long sampling from a normal distribution takes for various - samplers - """ + """Test how long sampling from a normal distribution takes for various samplers.""" params = [pm.NUTS, pm.HamiltonianMC, pm.Metropolis, pm.Slice] timer = timeit.default_timer @@ -161,7 +158,7 @@ def time_glm_hierarchical(self): class NUTSInitSuite: - """Tests initializations for NUTS sampler on models""" + """Tests initializations for NUTS sampler on models.""" timeout = 360.0 params = ("adapt_diag", "jitter+adapt_diag", "jitter+adapt_full", "adapt_full") diff --git a/docs/source/contributing/developer_guide.md b/docs/source/contributing/developer_guide.md index 257795b14..7820b1f43 100644 --- a/docs/source/contributing/developer_guide.md +++ b/docs/source/contributing/developer_guide.md @@ -34,7 +34,7 @@ $$ z \sim \text{Normal}(0, 5) $$ -A call to a {class}`~pymc.Distribution` constructor as shown above returns an PyTensor {class}`~pytensor.tensor.TensorVariable`, which is a symbolic representation of the model variable and the graph of inputs it depends on. +A call to a {class}`~pymc.Distribution` constructor as shown above returns a PyTensor {class}`~pytensor.tensor.TensorVariable`, which is a symbolic representation of the model variable and the graph of inputs it depends on. Under the hood, the variables are created through the {meth}`~pymc.Distribution.dist` API, which calls the {class}`~pytensor.tensor.random.basic.RandomVariable` {class}`~pytensor.graph.op.Op` corresponding to the distribution. At a high level of abstraction, the idea behind ``RandomVariable`` ``Op``s is to create symbolic variables (``TensorVariable``s) that can be associated with the properties of a probability distribution. @@ -134,7 +134,7 @@ model_logp # ==> -6.6973152 ## Behind the scenes of the ``logp`` function -The ``logp`` function is straightforward - it is an PyTensor function within each distribution. +The ``logp`` function is straightforward - it is a PyTensor function within each distribution. It has the following signature: :::{warning} @@ -277,7 +277,7 @@ as for ``FreeRV`` and ``ObservedRV``, they are ``TensorVariable``\s with ``Factor`` basically `enable and assign the logp `__ -(represented as a tensor also) property to an PyTensor tensor (thus +(represented as a tensor also) property to a PyTensor tensor (thus making it a random variable). For a ``TransformedRV``, it transforms the distribution into a ``TransformedDistribution``, and then ``model.Var`` is called again to added the RV associated with the @@ -373,7 +373,7 @@ def logpt(self): return logp ``` -which returns an PyTensor tensor that its value depends on the free parameters in the model (i.e., its parent nodes from the PyTensor graph). +which returns a PyTensor tensor that its value depends on the free parameters in the model (i.e., its parent nodes from the PyTensor graph). You can evaluate or compile into a python callable (that you can pass numpy as input args). Note that the logp tensor depends on its input in the PyTensor graph, thus you cannot pass new tensor to generate a logp function. For similar reason, in PyMC we do graph copying a lot using pytensor.clone_replace to replace the inputs to a tensor. @@ -561,7 +561,7 @@ Moreover, transition kernels in TFP do not flatten the tensors, see eg docstring We love NUTS, or to be more precise Dynamic HMC with complex stopping rules. This part is actually all done outside of PyTensor, for NUTS, it includes: The leapfrog, dual averaging, tuning of mass matrix and step size, the tree building, sampler related statistics like divergence and energy checking. -We actually have an PyTensor version of HMC, but it has never been used, and has been removed from the main repository. +We actually have a PyTensor version of HMC, but it has never been used, and has been removed from the main repository. It can still be found in the [git history](https://github.com/pymc-devs/pymc/pull/3734/commits/0fdae8207fd14f66635f3673ef267b2b8817aa68), though. #### Variational Inference (VI) diff --git a/docs/source/guides/Gaussian_Processes.rst b/docs/source/guides/Gaussian_Processes.rst index 3d1fbc80b..19f47c1f1 100644 --- a/docs/source/guides/Gaussian_Processes.rst +++ b/docs/source/guides/Gaussian_Processes.rst @@ -158,7 +158,7 @@ other type of random variable. The first argument is the name of the random variable representing the function we are placing the prior over. The second argument is the inputs to the function that the prior is over, :code:`X`. The inputs are usually known and present in the data, but they can -also be PyMC random variables. If the inputs are an PyTensor tensor or a +also be PyMC random variables. If the inputs are a PyTensor tensor or a PyMC random variable, the :code:`shape` needs to be given. Usually at this point, inference is performed on the model. The diff --git a/docs/source/learn/core_notebooks/Gaussian_Processes.rst b/docs/source/learn/core_notebooks/Gaussian_Processes.rst index f076a6f65..41cb5903c 100644 --- a/docs/source/learn/core_notebooks/Gaussian_Processes.rst +++ b/docs/source/learn/core_notebooks/Gaussian_Processes.rst @@ -155,7 +155,7 @@ other type of random variable. The first argument is the name of the random variable representing the function we are placing the prior over. The second argument is the inputs to the function that the prior is over, :code:`X`. The inputs are usually known and present in the data, but they can -also be PyMC random variables. If the inputs are an PyTensor tensor or a +also be PyMC random variables. If the inputs are a PyTensor tensor or a PyMC random variable, the :code:`shape` needs to be given. Usually at this point, inference is performed on the model. The diff --git a/docs/source/learn/core_notebooks/pymc_pytensor.ipynb b/docs/source/learn/core_notebooks/pymc_pytensor.ipynb index a5524fe9d..aad72316a 100644 --- a/docs/source/learn/core_notebooks/pymc_pytensor.ipynb +++ b/docs/source/learn/core_notebooks/pymc_pytensor.ipynb @@ -415,7 +415,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### What is in an PyTensor graph?\n", + "### What is in a PyTensor graph?\n", "\n", "The following diagram shows the basic structure of an `pytensor` graph.\n", "\n", diff --git a/pymc/__init__.py b/pymc/__init__.py index 83d147a3a..a828b7282 100644 --- a/pymc/__init__.py +++ b/pymc/__init__.py @@ -13,6 +13,8 @@ # limitations under the License. +"""PyMC: Bayesian Modeling and Probabilistic Programming in Python.""" + import logging _log = logging.getLogger(__name__) diff --git a/pymc/backends/__init__.py b/pymc/backends/__init__.py index 2f58b7ed8..fc1356b85 100644 --- a/pymc/backends/__init__.py +++ b/pymc/backends/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Storage backends for traces +"""Storage backends for traces. The NDArray (pymc.backends.NDArray) backend holds the entire trace in memory. @@ -101,7 +101,7 @@ def _init_trace( model: Model, trace_vars: list[TensorVariable] | None = None, ) -> BaseTrace: - """Initializes a trace backend for a chain.""" + """Initialize a trace backend for a chain.""" strace: BaseTrace if trace is None: strace = NDArray(model=model, vars=trace_vars) @@ -126,7 +126,7 @@ def init_traces( model: Model, trace_vars: list[TensorVariable] | None = None, ) -> tuple[RunType | None, Sequence[IBaseTrace]]: - """Initializes a trace recorder for each chain.""" + """Initialize a trace recorder for each chain.""" if HAS_MCB and isinstance(backend, Backend): return init_chain_adapters( backend=backend, diff --git a/pymc/backends/base.py b/pymc/backends/base.py index 6aefed81b..c0239f8de 100644 --- a/pymc/backends/base.py +++ b/pymc/backends/base.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Base backend for traces +"""Base backend for traces. See the docstring for pymc.backends for more information """ @@ -55,6 +55,7 @@ class IBaseTrace(ABC, Sized): """Sampler stats for each sampler.""" def __len__(self): + """Length of the chain.""" raise NotImplementedError() def get_values(self, varname: str, burn=0, thin=1) -> np.ndarray: @@ -101,8 +102,12 @@ def _slice(self, idx: slice) -> "IBaseTrace": raise NotImplementedError() def point(self, idx: int) -> dict[str, np.ndarray]: - """Return dictionary of point values at `idx` for current chain - with variables names as keys. + """Return point values at `idx` for current chain. + + Returns + ------- + values : dict[str, np.ndarray] + Dictionary of values with variable names as keys. """ raise NotImplementedError() @@ -127,7 +132,7 @@ def close(self): class BaseTrace(IBaseTrace): - """Base trace object + """Base trace object. Parameters ---------- @@ -208,6 +213,7 @@ def setup(self, draws, chain, sampler_vars=None) -> None: # Selection methods def __getitem__(self, idx): + """Get the sample at index `idx`.""" if isinstance(idx, slice): return self._slice(idx) @@ -339,6 +345,7 @@ def __init__(self, straces: Sequence[IBaseTrace]): self._report = SamplerReport() def __repr__(self): + """Return a string representation of MultiTrace.""" template = "<{}: {} chains, {} iterations, {} variables>" return template.format(self.__class__.__name__, self.nchains, len(self), len(self.varnames)) @@ -355,9 +362,11 @@ def report(self) -> SamplerReport: return self._report def __iter__(self): + """Return an iterator of the MultiTrace.""" raise NotImplementedError def __getitem__(self, idx): + """Get the sample at index `idx`.""" if isinstance(idx, slice): return self._slice(idx) @@ -393,6 +402,7 @@ def __getitem__(self, idx): _attrs = {"_straces", "varnames", "chains", "stat_names", "_report"} def __getattr__(self, name): + """Get the value of the attribute of name `name`.""" # Avoid infinite recursion when called before __init__ # variables are set up (e.g., when pickling). if name in self._attrs: @@ -412,6 +422,7 @@ def __getattr__(self, name): raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'") def __len__(self): + """Length of the chains.""" chain = self.chains[-1] return len(self._straces[chain]) @@ -546,7 +557,7 @@ def point(self, idx: int, chain: int | None = None) -> dict[str, np.ndarray]: return self._straces[chain].point(idx) def points(self, chains=None): - """Return an iterator over all or some of the sample points + """Return an iterator over all or some of the sample points. Parameters ---------- @@ -561,8 +572,7 @@ def points(self, chains=None): def _squeeze_cat(results, combine: bool, squeeze: bool): - """Squeeze and concatenate the results depending on values of - `combine` and `squeeze`.""" + """Squeeze and/or concatenate the results.""" if combine: results = np.concatenate(results) if not squeeze: diff --git a/pymc/backends/mcbackend.py b/pymc/backends/mcbackend.py index 3e6dd8ba8..3d2c8fd9e 100644 --- a/pymc/backends/mcbackend.py +++ b/pymc/backends/mcbackend.py @@ -43,7 +43,7 @@ def find_data(pmodel: Model) -> list[mcb.DataVariable]: - """Extracts data variables from a model.""" + """Extract data variables from a model.""" observed_rvs = {pmodel.rvs_to_values[rv] for rv in pmodel.observed_RVs} dvars = [] # All data containers are named vars! @@ -124,13 +124,14 @@ def record(self, draw: Mapping[str, np.ndarray], stats: Sequence[Mapping[str, An return self._chain.append(value_dict, stats_dict) def __len__(self): + """Length of the chain.""" return len(self._chain) def get_values(self, varname: str, burn=0, thin=1) -> np.ndarray: return self._chain.get_draws(varname, slice(burn, None, thin)) def _get_stats(self, fname: str, slc: slice) -> np.ndarray: - """Wraps `self._chain.get_stats` but unpickles automatically.""" + """Wrap `self._chain.get_stats` but unpickle automatically.""" values = self._chain.get_stats(fname, slc) # Unpickle object stats if fname in self._statsbj.object_stats: diff --git a/pymc/backends/ndarray.py b/pymc/backends/ndarray.py index cf57f9805..98a11fdec 100644 --- a/pymc/backends/ndarray.py +++ b/pymc/backends/ndarray.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""NumPy array trace backend +"""NumPy array trace backend. Store sampling values in memory as a NumPy array. """ @@ -27,7 +27,7 @@ class NDArray(base.BaseTrace): - """NDArray trace object + """NDArray trace object. Parameters ---------- @@ -138,6 +138,7 @@ def close(self): # Selection methods def __len__(self): + """Length of the chain.""" if not self.samples: # `setup` has not been called. return 0 return self.draw_idx @@ -183,8 +184,12 @@ def _slice(self, idx: slice): return sliced def point(self, idx) -> dict[str, Any]: - """Return dictionary of point values at `idx` for current chain - with variable names as keys. + """Return point values at `idx` for current chain. + + Returns + ------- + values : dict[str, Any] + Dictionary of values with variable names as keys. """ idx = int(idx) return {varname: values[idx] for varname, values in self.samples.items()} @@ -212,7 +217,7 @@ def _slice_as_ndarray(strace, idx): def point_list_to_multitrace( point_list: list[dict[str, np.ndarray]], model: Model | None = None ) -> MultiTrace: - """transform point list into MultiTrace""" + """Transform point list into MultiTrace.""" _model = modelcontext(model) varnames = list(point_list[0].keys()) with _model: diff --git a/pymc/backends/report.py b/pymc/backends/report.py index 49e584a97..9a630ee24 100644 --- a/pymc/backends/report.py +++ b/pymc/backends/report.py @@ -43,7 +43,7 @@ def ok(self): @property def n_tune(self) -> int | None: - """Number of tune iterations - not necessarily kept in trace!""" + """Number of tune iterations - not necessarily kept in trace.""" return self._n_tune @property diff --git a/pymc/blocking.py b/pymc/blocking.py index 287a06d53..dcbfe0ead 100644 --- a/pymc/blocking.py +++ b/pymc/blocking.py @@ -12,11 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -pymc.blocking - -Classes for working with subsets of parameters. -""" +"""Classes for working with subsets of parameters.""" from __future__ import annotations @@ -51,9 +47,7 @@ class RaveledVars(NamedTuple): class Compose(Generic[T]): - """ - Compose two functions in a pickleable way - """ + """Compose two functions in a pickleable way.""" def __init__(self, fa: Callable[[PointType], T], fb: Callable[[RaveledVars], PointType]): self.fa = fa diff --git a/pymc/data.py b/pymc/data.py index a0d6893cb..247825981 100644 --- a/pymc/data.py +++ b/pymc/data.py @@ -53,7 +53,7 @@ def get_data(filename): - """Returns a BytesIO object for a package data file. + """Return a BytesIO object for a package data file. Parameters ---------- @@ -87,9 +87,9 @@ def clone(self): class GeneratorAdapter: - """ - Helper class that helps to infer data type of generator with looking - at the first item, preserving the order of the resulting generator + """Class that helps infer data type of generator. + + It looks at the first item, preserving the order of the resulting generator. """ def make_variable(self, gop, name=None): @@ -108,6 +108,7 @@ def __init__(self, generator): # python3 generator def __next__(self): + """Next value in the generator.""" if not self._yielded_test_value: self._yielded_test_value = True return self.test_value @@ -118,12 +119,15 @@ def __next__(self): next = __next__ def __iter__(self): + """Return an iterator.""" return self def __eq__(self, other): + """Return true if both objects are actually the same.""" return id(self) == id(other) def __hash__(self): + """Return a hash of the object.""" return hash(id(self)) @@ -135,7 +139,7 @@ class MinibatchIndexRV(IntegersRV): class MinibatchOp(OpFromGraph): - """Encapsulate Minibatch random draws in an opaque OFG""" + """Encapsulate Minibatch random draws in an opaque OFG.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs, inline=True) @@ -186,7 +190,6 @@ def Minibatch(variable: TensorVariable, *variables: TensorVariable, batch_size: >>> data2 = np.random.randn(100, 20) >>> mdata1, mdata2 = Minibatch(data1, data2, batch_size=10) """ - if not isinstance(batch_size, int): raise TypeError("batch_size must be an integer") @@ -221,7 +224,7 @@ def determine_coords( dims: Sequence[str | None] | None = None, coords: dict[str, Sequence | np.ndarray] | None = None, ) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None]]: - """Determines coordinate values from data or the model (via ``dims``).""" + """Determine coordinate values from data or the model (via ``dims``).""" if coords is None: coords = {} @@ -340,7 +343,7 @@ def Data( mutable: bool | None = None, **kwargs, ) -> SharedVariable | TensorConstant: - """Data container that registers a data variable with the model. + """Create a data container that registers a data variable with the model. Depending on the ``mutable`` setting (default: True), the variable is registered as a :class:`~pytensor.compile.sharedvalue.SharedVariable`, diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py index d5b23bfba..4d2088356 100644 --- a/pymc/distributions/__init__.py +++ b/pymc/distributions/__init__.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Probability distributions.""" + from pymc.distributions.censored import Censored from pymc.distributions.continuous import ( AsymmetricLaplace, diff --git a/pymc/distributions/censored.py b/pymc/distributions/censored.py index ed11c633a..4be21b1c9 100644 --- a/pymc/distributions/censored.py +++ b/pymc/distributions/censored.py @@ -33,7 +33,7 @@ class CensoredRV(SymbolicRandomVariable): - """Censored random variable""" + """Censored random variable.""" inline_logprob = True extended_signature = "(),(),()->()" @@ -61,7 +61,7 @@ def rv_op(cls, dist, lower, upper, *, size=None): class Censored(Distribution): r""" - Censored distribution + Censored distribution. The pdf of a censored distribution is diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py index 6e68d98bf..d72346ff0 100644 --- a/pymc/distributions/continuous.py +++ b/pymc/distributions/continuous.py @@ -14,11 +14,7 @@ # Contains code from AePPL, Copyright (c) 2021-2022, Aesara Developers. -# coding: utf-8 -""" -A collection of common probability distributions for stochastic -nodes in PyMC. -""" +"""A collection of common probability distributions for stochastic nodes in PyMC.""" import warnings @@ -137,19 +133,19 @@ def polyagamma_cdf(*args, **kwargs): class PositiveContinuous(Continuous): - """Base class for positive continuous distributions""" + """Base class for positive continuous distributions.""" class UnitContinuous(Continuous): - """Base class for continuous distributions on [0,1]""" + """Base class for continuous distributions on [0,1].""" class CircularContinuous(Continuous): - """Base class for circular continuous distributions""" + """Base class for circular continuous distributions.""" class BoundedContinuous(Continuous): - """Base class for bounded continuous distributions""" + """Base class for bounded continuous distributions.""" # Indices of the arguments that define the lower and upper bounds of the distribution bound_args_indices: list[int] | None = None @@ -216,8 +212,10 @@ def assert_negative_support(var, label, distname, value=-1e-6): def get_tau_sigma(tau=None, sigma=None): r""" - Find precision and standard deviation. The link between the two - parameterizations is given by the inverse relationship: + Find precision and standard deviation. + + The link between the two parameterizations is given by the inverse + relationship: .. math:: \tau = \frac{1}{\sigma^2} @@ -370,10 +368,7 @@ def rng_fn(cls, rng, size): class Flat(Continuous): - """ - Uninformative log-likelihood that returns 0 regardless of - the passed value. - """ + """Uninformative log-likelihood that returns 0 regardless of the passed value.""" rv_op = flat @@ -3766,8 +3761,7 @@ def rng_fn(cls, rng, x, pdf, cdf, size=None) -> np.ndarray: class Interpolated(BoundedContinuous): r""" - Univariate probability distribution defined as a linear interpolation - of probability density function evaluated on some lattice of points. + Univariate linear interpolation of pdf evaluated on some lattice of points. The lattice can be uneven, so the steps between different points can have different size and it is possible to vary the precision between regions @@ -3837,9 +3831,7 @@ def dist(cls, x_points, pdf_points, *args, **kwargs): return super().dist([x_points, pdf_points, cdf_points], **kwargs) def support_point(rv, size, x_points, pdf_points, cdf_points): - """ - Estimates the expectation integral using the trapezoid rule; cdf_points are not used. - """ + """Estimates the expectation integral using the trapezoid rule; cdf_points are not used.""" x_fx = pt.mul(x_points, pdf_points) # x_i * f(x_i) for all xi's in x_points support_point = ( pt.sum(pt.mul(pt.diff(x_points, axis=-1), x_fx[..., 1:] + x_fx[..., :-1])) / 2 @@ -3993,7 +3985,7 @@ def __call__(self, h=1.0, z=0.0, size=None, **kwargs): @classmethod def rng_fn(cls, rng, h, z, size=None) -> np.ndarray: """ - Generate a random sample from the distribution with the given parameters + Generate a random sample from the distribution with the given parameters. Parameters ---------- diff --git a/pymc/distributions/custom.py b/pymc/distributions/custom.py index 2e9a68870..3238680bb 100644 --- a/pymc/distributions/custom.py +++ b/pymc/distributions/custom.py @@ -73,7 +73,7 @@ def default_support_point(rv, size, *rv_inputs, rv_name=None, has_fallback=False class CustomDistRV(RandomVariable): """ - Base class for CustomDistRV + Base class for CustomDistRV. This should be subclassed when defining CustomDist objects. """ @@ -89,7 +89,7 @@ def rng_fn(cls, rng, *args): class _CustomDist(Distribution): - """A distribution that returns a subclass of CustomDistRV""" + """A distribution that returns a subclass of CustomDistRV.""" rv_type = CustomDistRV @@ -194,7 +194,7 @@ def custom_dist_support_point(op, rv, rng, size, *dist_params): class CustomSymbolicDistRV(SymbolicRandomVariable): """ - Base class for CustomSymbolicDist + Base class for CustomSymbolicDist. This should be subclassed when defining custom CustomDist objects that have symbolic random methods. @@ -363,7 +363,6 @@ def change_custom_dist_size(op, rv, new_size, expand): @staticmethod def _infer_final_signature(signature: str, n_inputs, n_outputs, n_rngs) -> str: """Add size and updates to user provided gufunc signature if they are missing.""" - # Regex to split across outer commas # Copied from https://stackoverflow.com/a/26634150 outer_commas = re.compile(r",\s*(?![^()]*\))") @@ -460,7 +459,7 @@ def dist_support_point(op, rv, *args): class CustomDist: - """A helper class to create custom distributions + """A helper class to create custom distributions. This class can be used to wrap black-box random and logp methods for use in forward and mcmc sampling. @@ -511,9 +510,9 @@ class CustomDist: A callable that calculates the log probability of some given ``value`` conditioned on certain distribution parameter values. It must have the following signature: ``logp(value, *dist_params)``, where ``value`` is - an PyTensor tensor that represents the distribution value, and ``dist_params`` + a PyTensor tensor that represents the distribution value, and ``dist_params`` are the tensors that hold the values of the distribution parameters. - This function must return an PyTensor tensor. + This function must return a PyTensor tensor. When the `dist` function is specified, PyMC will try to automatically infer the `logp` when this is not provided. @@ -524,9 +523,9 @@ class CustomDist: A callable that calculates the log cumulative log probability of some given ``value`` conditioned on certain distribution parameter values. It must have the following signature: ``logcdf(value, *dist_params)``, where ``value`` is - an PyTensor tensor that represents the distribution value, and ``dist_params`` + a PyTensor tensor that represents the distribution value, and ``dist_params`` are the tensors that hold the values of the distribution parameters. - This function must return an PyTensor tensor. If ``None``, a ``NotImplementedError`` + This function must return a PyTensor tensor. If ``None``, a ``NotImplementedError`` will be raised when trying to compute the distribution's logcdf. support_point : Optional[Callable] A callable that can be used to compute the finete logp point of the distribution. @@ -551,7 +550,7 @@ class CustomDist: When specified, `ndim_supp` and `ndims_params` are not needed. See examples below. dtype : str The dtype of the distribution. All draws and observations passed into the - distribution will be cast onto this dtype. This is not needed if an PyTensor + distribution will be cast onto this dtype. This is not needed if a PyTensor dist function is provided, which should already return the right dtype! class_name : str Name for the class which will wrap the CustomDist methods. When not specified, diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py index 93f318feb..179bae25f 100644 --- a/pymc/distributions/discrete.py +++ b/pymc/distributions/discrete.py @@ -293,7 +293,7 @@ def logcdf(value, n, alpha, beta): class Bernoulli(Discrete): - R"""Bernoulli log-likelihood + R"""Bernoulli log-likelihood. The Bernoulli distribution describes the probability of successes (x=1) and failures (x=0). diff --git a/pymc/distributions/dist_math.py b/pymc/distributions/dist_math.py index 32b61e2f6..1cdb3b294 100644 --- a/pymc/distributions/dist_math.py +++ b/pymc/distributions/dist_math.py @@ -13,7 +13,7 @@ # limitations under the License. """ -Created on Mar 7, 2011 +Created on Mar 7, 2011. @author: johnsalvatier """ @@ -90,9 +90,7 @@ def check_icdf_value(expr: Variable, value: Variable) -> Variable: def logpow(x, m): - """ - Calculates log(x**m) since m*log(x) will fail when m, x = 0. - """ + """Calculate log(x**m) since m*log(x) will fail when m, x = 0.""" # return m * log(x) return pt.switch(pt.eq(x, 0), pt.switch(pt.eq(m, 0), 0.0, -np.inf), m * pt.log(x)) @@ -110,9 +108,7 @@ def betaln(x, y): def std_cdf(x): - """ - Calculates the standard normal cumulative distribution function. - """ + """Calculate the standard normal cumulative distribution function.""" return 0.5 + 0.5 * pt.erf(x / pt.sqrt(2.0)) @@ -136,7 +132,7 @@ def normal_lccdf(mu, sigma, x): def log_diff_normal_cdf(mu, sigma, x, y): - """ + r""" Compute :math:`\\log(\\Phi(\frac{x - \\mu}{\\sigma}) - \\Phi(\frac{y - \\mu}{\\sigma}))` safely in log space. Parameters @@ -176,16 +172,18 @@ def log_diff_normal_cdf(mu, sigma, x, y): def sigma2rho(sigma): + """Convert `sigma` into `rho` with PyTensor. + + :math:`mu + sigma*e = mu + log(1+exp(rho))*e`. """ - `sigma -> rho` PyTensor converter - :math:`mu + sigma*e = mu + log(1+exp(rho))*e`""" return pt.log(pt.exp(pt.abs(sigma)) - 1.0) def rho2sigma(rho): + """Convert `rho` to `sigma` with PyTensor. + + :math:`mu + sigma*e = mu + log(1+exp(rho))*e`. """ - `rho -> sigma` PyTensor converter - :math:`mu + sigma*e = mu + log(1+exp(rho))*e`""" return pt.softplus(rho) @@ -195,8 +193,7 @@ def rho2sigma(rho): def log_normal(x, mean, **kwargs): """ - Calculate logarithm of normal distribution at point `x` - with given `mean` and `std` + Calculate logarithm of normal distribution at point `x` with given `mean` and `std`. Parameters ---------- @@ -239,9 +236,7 @@ def log_normal(x, mean, **kwargs): class SplineWrapper(Op): - """ - Creates an PyTensor operation from scipy.interpolate.UnivariateSpline - """ + """Creates a PyTensor operation from scipy.interpolate.UnivariateSpline.""" __props__ = ("spline",) @@ -276,9 +271,7 @@ def grad(self, inputs, grads): class I1e(UnaryScalarOp): - """ - Modified Bessel function of the first kind of order 1, exponentially scaled. - """ + """Modified Bessel function of the first kind of order 1, exponentially scaled.""" nfunc_spec = ("scipy.special.i1e", 1, 1) @@ -291,9 +284,7 @@ def impl(self, x): class I0e(UnaryScalarOp): - """ - Modified Bessel function of the first kind of order 0, exponentially scaled. - """ + """Modified Bessel function of the first kind of order 0, exponentially scaled.""" nfunc_spec = ("scipy.special.i0e", 1, 1) @@ -311,7 +302,7 @@ def grad(self, inp, grads): def random_choice(p, size): - """Return draws from categorical probability functions + """Return draws from categorical probability functions. Parameters ---------- @@ -350,9 +341,7 @@ def random_choice(p, size): def zvalue(value, sigma, mu): - """ - Calculate the z-value for a normal distribution. - """ + """Calculate the z-value for a normal distribution.""" return (value - mu) / sigma @@ -397,7 +386,7 @@ def clipped_beta_rvs(a, b, size=None, random_state=None, dtype="float64"): def multigammaln(a, p): - """Multivariate Log Gamma + """Multivariate Log Gamma. Parameters ---------- @@ -410,9 +399,7 @@ def multigammaln(a, p): def log_i0(x): - """ - Calculates the logarithm of the 0 order modified Bessel function of the first kind"" - """ + """Calculate the logarithm of the 0 order modified Bessel function of the first kind.""" return pt.switch( pt.lt(x, 5), pt.log1p( diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index 1b11f18ba..21d3a4d29 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -86,8 +86,7 @@ class _Unpickling: class DistributionMeta(ABCMeta): """ - DistributionMeta class - + DistributionMeta class. Notes ----- @@ -194,8 +193,9 @@ def support_point(op, rv, *dist_params): class _class_or_instancemethod(classmethod): - """Allow a method to be called both as a classmethod and an instancemethod, - giving priority to the instancemethod. + """Allow a method to be called both as a classmethod and an instancemethod. + + Priority is given to the instancemethod. This is used to allow extracting information from the signature of a SymbolicRandomVariable which may be provided either as a class attribute or as an instance attribute. @@ -209,7 +209,7 @@ def __get__(self, instance, type_): class SymbolicRandomVariable(MeasurableOp, OpFromGraph): - """Symbolic Random Variable + """Symbolic Random Variable. This is a subclasse of `OpFromGraph` which is used to encapsulate the symbolic random graph of complex distributions which are built on top of pure @@ -270,7 +270,7 @@ def ndims_params(cls_or_self) -> Sequence[int] | None: @_class_or_instancemethod @property def ndim_supp(cls_or_self) -> int | None: - """Number of support dimensions of the RandomVariable + """Number of support dimensions of the RandomVariable. (0 for scalar, 1 for vector, ...) """ @@ -309,7 +309,7 @@ def default_output(cls_or_self) -> int | None: def get_input_output_type_idxs( extended_signature: str | None, ) -> tuple[tuple[tuple[int], int | None, tuple[int]], tuple[tuple[int], tuple[int]]]: - """Parse extended_signature and return indexes for *[rng], [size] and parameters as well as outputs""" + """Parse extended_signature and return indexes for *[rng], [size] and parameters as well as outputs.""" if extended_signature is None: raise ValueError("extended_signature must be provided") @@ -341,17 +341,17 @@ def get_input_output_type_idxs( ) def rng_params(self, node) -> tuple[Variable, ...]: - """Extract the rng parameters from the node's inputs""" + """Extract the rng parameters from the node's inputs.""" [rng_args_idxs, _, _], _ = self.get_input_output_type_idxs(self.extended_signature) return tuple(node.inputs[i] for i in rng_args_idxs) def size_param(self, node) -> Variable | None: - """Extract the size parameter from the node's inputs""" + """Extract the size parameter from the node's inputs.""" [_, size_arg_idx, _], _ = self.get_input_output_type_idxs(self.extended_signature) return node.inputs[size_arg_idx] if size_arg_idx is not None else None def dist_params(self, node) -> tuple[Variable, ...]: - """Extract distribution parameters from the node's inputs""" + """Extract distribution parameters from the node's inputs.""" [_, _, param_args_idxs], _ = self.get_input_output_type_idxs(self.extended_signature) return tuple(node.inputs[i] for i in param_args_idxs) @@ -384,7 +384,7 @@ def __init__( super().__init__(*args, **kwargs) def update(self, node: Apply) -> dict[Variable, Variable]: - """Symbolic update expression for input random state variables + """Symbolic update expression for input random state variables. Returns a dictionary with the symbolic expressions required for correct updating of random state input variables repeated function evaluations. This is used by @@ -393,7 +393,7 @@ def update(self, node: Apply) -> dict[Variable, Variable]: return collect_default_updates_inner_fgraph(node) def batch_ndim(self, node: Apply) -> int: - """Number of dimensions of the distribution's batch shape.""" + """Return the number of dimensions of the distribution's batch shape.""" out_ndim = max(getattr(out.type, "ndim", 0) for out in node.outputs) return out_ndim - self.ndim_supp @@ -421,7 +421,7 @@ def change_symbolic_rv_size(op: SymbolicRandomVariable, rv, new_size, expand) -> class Distribution(metaclass=DistributionMeta): - """Statistical distribution""" + """Statistical distribution.""" rv_op: [RandomVariable, SymbolicRandomVariable] = None rv_type: MetaType = None @@ -439,7 +439,7 @@ def __new__( default_transform=UNSET, **kwargs, ) -> TensorVariable: - """Adds a tensor variable corresponding to a PyMC distribution to the current model. + """Add a tensor variable corresponding to a PyMC distribution to the current model. Note that all remaining kwargs must be compatible with ``.dist()`` @@ -477,7 +477,6 @@ def __new__( rv : TensorVariable The created random variable tensor, registered in the Model. """ - try: from pymc.model import Model @@ -533,7 +532,7 @@ def dist( shape: Shape | None = None, **kwargs, ) -> TensorVariable: - """Creates a tensor variable corresponding to the `cls` distribution. + """Create a tensor variable corresponding to the `cls` distribution. Parameters ---------- @@ -582,10 +581,7 @@ def dist( @node_rewriter([SymbolicRandomVariable]) def inline_symbolic_random_variable(fgraph, node): - """ - Optimization that expands the internal graph of a SymbolicRV when obtaining the logp - graph, if the flag `inline_logprob` is True. - """ + """Expand a SymbolicRV when obtaining the logp graph if `inline_logprob` is True.""" op = node.op if op.inline_logprob: return clone_replace(op.inner_outputs, dict(zip(op.inner_inputs, node.inputs))) @@ -606,8 +602,7 @@ def _support_point(op, rv, *rv_inputs) -> TensorVariable: def support_point(rv: TensorVariable) -> TensorVariable: - """Method for choosing a representative point/value - that can be used to start optimization or MCMC sampling. + """Choose a representative point/value that can be used to start optimization or MCMC sampling. The only parameter to this function is the RandomVariable for which the value is to be derived. @@ -632,7 +627,7 @@ def moment(rv: TensorVariable) -> TensorVariable: class Discrete(Distribution): - """Base class for discrete distributions""" + """Base class for discrete distributions.""" def __new__(cls, name, *args, **kwargs): if kwargs.get("transform", None): @@ -642,7 +637,7 @@ def __new__(cls, name, *args, **kwargs): class Continuous(Distribution): - """Base class for continuous distributions""" + """Base class for continuous distributions.""" class DiracDeltaRV(SymbolicRandomVariable): diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py index 36cd1c397..dc704e512 100644 --- a/pymc/distributions/mixture.py +++ b/pymc/distributions/mixture.py @@ -163,7 +163,7 @@ def update(self, node: Apply): class Mixture(Distribution): R""" - Mixture log-likelihood + Mixture log-likelihood. Often used to model subpopulation heterogeneity @@ -493,7 +493,7 @@ def mixture_args_fn(rng, weights, *components): class NormalMixture: R""" - Normal mixture log-likelihood + Normal mixture log-likelihood. .. math:: @@ -555,9 +555,9 @@ def dist(cls, w, mu, sigma=None, tau=None, **kwargs): def _zero_inflated_mixture(*, name, nonzero_p, nonzero_dist, **kwargs): - """Helper function to create a zero-inflated mixture + """Create a zero-inflated mixture (helper function). - If name is `None`, this function returns an unregistered variable + If name is `None`, this function returns an unregistered variable. """ nonzero_p = pt.as_tensor_variable(nonzero_p) weights = pt.stack([1 - nonzero_p, nonzero_p], axis=-1) @@ -702,10 +702,11 @@ def dist(cls, psi, n, p, **kwargs): class ZeroInflatedNegativeBinomial: R""" Zero-Inflated Negative binomial log-likelihood. + The Zero-inflated version of the Negative Binomial (NB). The NB distribution describes a Poisson random variable whose rate parameter is gamma distributed. - The pmf of this distribution is + The pmf of this distribution is. .. math:: @@ -799,7 +800,7 @@ def dist(cls, psi, mu=None, alpha=None, p=None, n=None, **kwargs): def _hurdle_mixture(*, name, nonzero_p, nonzero_dist, dtype, max_n_steps=10_000, **kwargs): - """Helper function to create a hurdle mixtures + """Create a hurdle mixtures (helper function). If name is `None`, this function returns an unregistered variable diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py index 5c99bc667..55e275be3 100644 --- a/pymc/distributions/multivariate.py +++ b/pymc/distributions/multivariate.py @@ -12,9 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -#!/usr/bin/env python -# -*- coding: utf-8 -*- - import warnings from functools import partial, reduce @@ -118,7 +115,7 @@ def _squeeze_to_ndim(var: TensorVariable | np.ndarray, ndim: int): class SimplexContinuous(Continuous): - """Base class for simplex continuous distributions""" + """Base class for simplex continuous distributions.""" @_default_transform.register(SimplexContinuous) @@ -282,8 +279,7 @@ def support_point(rv, size, mu, cov): def logp(value, mu, cov): """ - Calculate log-probability of Multivariate Normal distribution - at specified value. + Calculate logp of Multivariate Normal distribution at specified value. Parameters ---------- @@ -345,7 +341,7 @@ def precision_mv_normal_logp(op: PrecisionMvNormalRV, value, rng, size, mean, ta @node_rewriter(tracks=[MvNormalRV]) def mv_normal_to_precision_mv_normal(fgraph, node): - """Replaces MvNormal(mu, inv(tau)) -> PrecisionMvNormal(mu, tau) + """Replace MvNormal(mu, inv(tau)) -> PrecisionMvNormal(mu, tau). This is introduced in logprob rewrites to provide a more efficient logp for a MvNormal that is defined by a precision matrix. @@ -353,7 +349,6 @@ def mv_normal_to_precision_mv_normal(fgraph, node): Note: This won't be introduced when calling `pm.logp` as that will dispatch directly without triggering the logprob rewrites. """ - rng, size, mu, cov = node.inputs if cov.owner and cov.owner.op == matrix_inverse: tau = cov.owner.inputs[0] @@ -473,8 +468,7 @@ def support_point(rv, size, nu, mu, scale): def logp(value, nu, mu, scale): """ - Calculate log-probability of Multivariate Student's T distribution - at specified value. + Calculate logp of Multivariate Student's T distribution at specified value. Parameters ---------- @@ -539,8 +533,7 @@ def support_point(rv, size, a): def logp(value, a): """ - Calculate log-probability of Dirichlet distribution - at specified value. + Calculate logp of Dirichlet distribution at specified value. Parameters ---------- @@ -646,8 +639,7 @@ def support_point(rv, size, n, p): def logp(value, n, p): """ - Calculate log-probability of Multinomial distribution - at specified value. + Calculate logp of Multinomial distribution at specified value. Parameters ---------- @@ -658,7 +650,6 @@ def logp(value, n, p): ------- TensorVariable """ - res = factln(n) + pt.sum(-factln(value) + logpow(p, value), axis=-1) res = pt.switch( pt.or_(pt.any(pt.lt(value, 0), axis=-1), pt.neq(pt.sum(value, axis=-1), n)), @@ -741,8 +732,7 @@ def support_point(rv, size, n, a): def logp(value, n, a): """ - Calculate log-probability of DirichletMultinomial distribution - at specified value. + Calculate logp of DirichletMultinomial distribution at specified value. Parameters ---------- @@ -778,6 +768,7 @@ def logp(value, n, a): class _OrderedMultinomial(Multinomial): r""" Underlying class for ordered multinomial distributions. + See docs for the OrderedMultinomial wrapper class for more details on how to use it in models. """ @@ -900,10 +891,7 @@ def posdef(AA): class PosDefMatrix(Op): - """ - Check if input is positive definite. Input should be a square matrix. - - """ + """Check if input is positive definite. Input should be a square matrix.""" # Properties attribute __props__ = () @@ -1021,8 +1009,7 @@ def dist(cls, nu, V, *args, **kwargs): def logp(X, nu, V): """ - Calculate log-probability of Wishart distribution - at specified value. + Calculate logp of Wishart distribution at specified value. Parameters ---------- @@ -1033,7 +1020,6 @@ def logp(X, nu, V): ------- TensorVariable """ - p = V.shape[0] IVI = det(V) @@ -1056,9 +1042,10 @@ def logp(X, nu, V): def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, initval=None): r""" - Bartlett decomposition of the Wishart distribution. As the Wishart - distribution requires the matrix to be symmetric positive semi-definite - it is impossible for MCMC to ever propose acceptable matrices. + Bartlett decomposition of the Wishart distribution. + + As the Wishart distribution requires the matrix to be symmetric positive + semi-definite, it is impossible for MCMC to ever propose acceptable matrices. Instead, we can use the Barlett decomposition which samples a lower diagonal matrix. Specifically: @@ -1101,7 +1088,6 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, initv This distribution is usually a bad idea to use as a prior for multivariate normal. You should instead use LKJCholeskyCov or LKJCorr. """ - L = S if is_cholesky else scipy.linalg.cholesky(S) diag_idx = np.diag_indices_from(S) tril_idx = np.tril_indices_from(S, k=-1) @@ -1258,6 +1244,7 @@ def update(self, node): class _LKJCholeskyCov(Distribution): r"""Underlying class for covariance matrix with LKJ distributed correlations. + See docs for LKJCholeskyCov function for more details on how to use it in models. """ @@ -1609,8 +1596,7 @@ def support_point(rv, *args): def logp(value, n, eta): """ - Calculate log-probability of LKJ distribution at specified - value. + Calculate logp of LKJ distribution at specified value. Parameters ---------- @@ -1621,7 +1607,6 @@ def logp(value, n, eta): ------- TensorVariable """ - if value.ndim > 1: raise NotImplementedError("LKJCorr logp is only implemented for vector values (ndim=1)") @@ -1911,8 +1896,7 @@ def support_point(rv, size, mu, rowchol, colchol): def logp(value, mu, rowchol, colchol): """ - Calculate log-probability of Matrix-valued Normal distribution - at specified value. + Calculate logp of Matrix-valued Normal distribution at specified value. Parameters ---------- @@ -1923,7 +1907,6 @@ def logp(value, mu, rowchol, colchol): ------- TensorVariable """ - if value.ndim != 2: raise ValueError("Value must be two dimensional.") @@ -2095,8 +2078,7 @@ def support_point(rv, rng, size, mu, sigma, *covs): def logp(value, rng, size, mu, sigma, *covs): """ - Calculate log-probability of Multivariate Normal distribution - with Kronecker-structured covariance at specified value. + Calculate logp of Multivariate Normal distribution with Kronecker-structured covariance at specified value. Parameters ---------- @@ -2165,11 +2147,12 @@ def make_node(self, rng, size, mu, W, alpha, tau, W_is_valid): @classmethod def rng_fn(cls, rng: np.random.RandomState, mu, W, alpha, tau, W_is_valid, size): - """ + """Sample a numeric random variate. + Implementation of algorithm from paper Havard Rue, 2001. "Fast sampling of Gaussian Markov random fields," Journal of the Royal Statistical Society Series B, Royal Statistical Society, - vol. 63(2), pages 325-338. DOI: 10.1111/1467-9868.00288 + vol. 63(2), pages 325-338. DOI: 10.1111/1467-9868.00288. """ if not W_is_valid.all(): raise ValueError("W must be a valid adjacency matrix") @@ -2220,8 +2203,10 @@ def rng_fn(cls, rng: np.random.RandomState, mu, W, alpha, tau, W_is_valid, size) class CAR(Continuous): r""" - Likelihood for a conditional autoregression. This is a special case of the - multivariate normal with an adjacency-structured covariance matrix. + Likelihood for a conditional autoregression. + + This is a special case of the multivariate normal with an + adjacency-structured covariance matrix. .. math:: @@ -2282,8 +2267,9 @@ def support_point(rv, size, mu, W, alpha, tau, W_is_valid): def logp(value, mu, W, alpha, tau, W_is_valid): """ - Calculate log-probability of a CAR-distributed vector - at specified value. This log probability function differs from + Calculate logp of a CAR-distributed vector at specified value. + + This log probability function differs from the true CAR log density (AKA a multivariate normal with CAR-structured covariance matrix) by an additive constant. @@ -2296,7 +2282,6 @@ def logp(value, mu, W, alpha, tau, W_is_valid): ------- TensorVariable """ - # If expand_dims were added to (a potentially sparse) W, retrieve the non-expanded W extra_dims = W.type.ndim - 2 if extra_dims: @@ -2368,9 +2353,10 @@ def rng_fn(cls, rng, size, W, sigma, zero_sum_stdev): class ICAR(Continuous): r""" - The intrinsic conditional autoregressive prior. It is primarily used to model - covariance between neighboring areas. It is a special case - of the :class:`~pymc.CAR` distribution where alpha is set to 1. + The intrinsic conditional autoregressive prior. + + It is primarily used to model covariance between neighboring areas. It is a + special case of the :class:`~pymc.CAR` distribution where alpha is set to 1. The log probability density function is @@ -2553,7 +2539,9 @@ def rng_fn(cls, rng, alpha, K, size): class StickBreakingWeights(SimplexContinuous): r""" - Likelihood of truncated stick-breaking weights. The weights are generated from a + Likelihood of truncated stick-breaking weights. + + The weights are generated from a stick-breaking proceduce where :math:`x_k = v_k \prod_{\ell < k} (1 - v_\ell)` for :math:`k \in \{1, \ldots, K\}` and :math:`x_K = \prod_{\ell = 1}^{K} (1 - v_\ell) = 1 - \sum_{\ell=1}^K x_\ell` with :math:`v_k \stackrel{\text{i.i.d.}}{\sim} \text{Beta}(1, \alpha)`. @@ -2617,8 +2605,7 @@ def support_point(rv, size, alpha, K): def logp(value, alpha, K): """ - Calculate log-probability of the distribution induced from the stick-breaking process - at specified value. + Calculate logp of the distribution induced from the stick-breaking process at specified value. Parameters ---------- @@ -2665,7 +2652,7 @@ def logp(value, alpha, K): class ZeroSumNormalRV(SymbolicRandomVariable): - """ZeroSumNormal random variable""" + """ZeroSumNormal random variable.""" _print_name = ("ZeroSumNormal", "\\operatorname{ZeroSumNormal}") @@ -2700,8 +2687,8 @@ def rv_op(cls, sigma, support_shape, *, size=None, rng=None): class ZeroSumNormal(Distribution): r""" - ZeroSumNormal distribution, i.e Normal distribution where one or - several axes are constrained to sum to zero. + Normal distribution where one or several axes are constrained to sum to zero. + By default, the last axis is constrained to sum to zero. See `n_zerosum_axes` kwarg for more details. diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py index 0cedd5da7..09a8c00a2 100644 --- a/pymc/distributions/shape_utils.py +++ b/pymc/distributions/shape_utils.py @@ -12,11 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# -*- coding: utf-8 -*- -""" -A collection of common shape operations needed for broadcasting -samples from probability distributions for stochastic nodes in PyMC. -""" +"""Common shape operations to broadcast samples from probability distributions for stochastic nodes in PyMC.""" import warnings @@ -51,7 +47,7 @@ def to_tuple(shape): - """Convert ints, arrays, and Nones to tuples + """Convert ints, arrays, and Nones to tuples. Parameters ---------- @@ -150,7 +146,7 @@ def convert_size(size: Size) -> StrongSize | None: def shape_from_dims(dims: StrongDims, model) -> StrongShape: - """Determines shape from a `dims` tuple. + """Determine shape from a `dims` tuple. Parameters ---------- @@ -164,7 +160,6 @@ def shape_from_dims(dims: StrongDims, model) -> StrongShape: dims : tuple of (str or None) Names or None for all RV dimensions. """ - # Dims must be known already unknowndim_dims = set(dims) - set(model.dim_lengths) if unknowndim_dims: @@ -180,7 +175,7 @@ def find_size( size: StrongSize | None, ndim_supp: int, ) -> StrongSize | None: - """Determines the size keyword argument for creating a Distribution. + """Determine the size keyword argument for creating a Distribution. Parameters ---------- @@ -197,7 +192,6 @@ def find_size( size : tuble of int or TensorVariable, optional The size argument for creating the Distribution """ - if size is not None: return size @@ -210,7 +204,7 @@ def find_size( def rv_size_is_none(size: TensorVariable | Constant | None) -> bool: - """Check whether an rv size is None (i.e., NoneConst)""" + """Check whether an rv size is None (i.e., NoneConst).""" if size is None: return True return isinstance(size.type, NoneTypeT) @@ -343,7 +337,7 @@ def get_support_shape( support_shape_offset: Sequence[int] | None = None, ndim_supp: int = 1, ) -> TensorVariable | None: - """Extract the support shapes from shape / dims / observed information + """Extract the support shapes from shape / dims / observed information. Parameters ---------- @@ -439,7 +433,11 @@ def get_support_shape_1d( observed: Any | None = None, support_shape_offset: int = 0, ) -> TensorVariable | None: - """Helper function for cases when you just care about one dimension.""" + """ + Extract the support shapes from shape / dims / observed information. + + Helper function for cases when you just care about one dimension. + """ support_shape_tuple = get_support_shape( support_shape=(support_shape,) if support_shape is not None else None, shape=shape, diff --git a/pymc/distributions/simulator.py b/pymc/distributions/simulator.py index dc7700f7d..de5da40a1 100644 --- a/pymc/distributions/simulator.py +++ b/pymc/distributions/simulator.py @@ -34,7 +34,7 @@ class SimulatorRV(RandomVariable): """ - Base class for SimulatorRVs + Base class for SimulatorRVs. This should be subclassed when defining custom Simulator objects. """ @@ -63,8 +63,7 @@ def sum_stat(cls, *args, **kwargs): class Simulator(Distribution): r""" - Simulator distribution, used for Approximate Bayesian Inference (ABC) - with Sequential Monte Carlo (SMC) sampling via :func:`~pymc.sample_smc`. + Used for Approximate Bayesian Inference with SMC sampling via :func:`~pymc.sample_smc`. Simulator distributions have a stochastic pseudo-loglikelihood defined by a distance metric between the observed and simulated data, and tweaked diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py index 80b13f56d..6469cd101 100644 --- a/pymc/distributions/timeseries.py +++ b/pymc/distributions/timeseries.py @@ -60,7 +60,7 @@ class RandomWalkRV(SymbolicRandomVariable): - """RandomWalk Variable""" + """RandomWalk Variable.""" _print_name = ("RandomWalk", "\\operatorname{RandomWalk}") @@ -121,7 +121,7 @@ def rv_op(cls, init_dist, innovation_dist, steps, size=None): class RandomWalk(Distribution): - r"""RandomWalk Distribution + r"""RandomWalk Distribution. TODO: Expand docstrings """ @@ -247,7 +247,7 @@ def random_walk_logp(op, values, *inputs, **kwargs): class PredefinedRandomWalk(ABCMeta): - """Base class for predefined RandomWalk distributions""" + """Base class for predefined RandomWalk distributions.""" def __new__(cls, name, *args, **kwargs): init_dist, innovation_dist, kwargs = cls.get_dists(*args, **kwargs) @@ -309,7 +309,7 @@ def get_dists(cls, mu=0.0, sigma=1.0, *, init_dist=None, **kwargs): class MvGaussianRandomWalk(PredefinedRandomWalk): - r"""Random Walk with Multivariate Normal innovations + r"""Random Walk with Multivariate Normal innovations. Parameters ---------- @@ -361,7 +361,7 @@ def get_dists(cls, mu, *, cov=None, tau=None, chol=None, lower=True, init_dist=N class MvStudentTRandomWalk(PredefinedRandomWalk): - r"""Multivariate Random Walk with StudentT innovations + r"""Multivariate Random Walk with StudentT innovations. Parameters ---------- @@ -630,7 +630,7 @@ def dist( @classmethod def _get_ar_order(cls, rhos: TensorVariable, ar_order: int | None, constant: bool) -> int: - """Compute ar_order given inputs + """Compute ar_order given inputs. If ar_order is not specified we do constant folding on the shape of rhos to retrieve it. For example, this will detect that @@ -774,7 +774,7 @@ def update(self, node: Node): class GARCH11(Distribution): r""" - GARCH(1,1) with Normal innovations. The model is specified by + GARCH(1,1) with Normal innovations. The model is specified by. .. math:: y_t \sim N(0, \sigma_t^2) diff --git a/pymc/distributions/transforms.py b/pymc/distributions/transforms.py index 2c4e121b4..d29bb3402 100644 --- a/pymc/distributions/transforms.py +++ b/pymc/distributions/transforms.py @@ -69,7 +69,7 @@ def __getattr__(name): @singledispatch def _default_transform(op: Op, rv: TensorVariable): - """Return default transform for a given Distribution `Op`""" + """Return default transform for a given Distribution `Op`.""" return None @@ -116,8 +116,9 @@ def log_jac_det(self, value, *inputs): class SumTo1(Transform): """ - Transforms K - 1 dimensional simplex space (k values in [0,1] and that sum to 1) to a K - 1 vector of values in [0,1] - This Transformation operates on the last dimension of the input tensor. + Transforms K - 1 dimensional simplex space (K values in [0, 1] that sum to 1) to a K - 1 vector of values in [0, 1]. + + This transformation operates on the last dimension of the input tensor. """ name = "sumto1" @@ -139,15 +140,12 @@ def log_jac_det(self, value, *inputs): class CholeskyCovPacked(Transform): - """ - Transforms the diagonal elements of the LKJCholeskyCov distribution to be on the - log scale - """ + """Transforms the diagonal elements of the LKJCholeskyCov distribution to be on the log scale.""" name = "cholesky-cov-packed" def __init__(self, n): - """ + """Create a CholeskyCovPack object. Parameters ---------- @@ -180,8 +178,7 @@ def log_jac_det(self, value, *inputs): class Interval(IntervalTransform): - """Wrapper around :class:`pymc.logprob.transforms.IntervalTransform` for use in the - ``transform`` argument of a random variable. + """Wrapper around :class:`pymc.logprob.transforms.IntervalTransform` for use in the ``transform`` argument of a random variable. Parameters ---------- diff --git a/pymc/distributions/truncated.py b/pymc/distributions/truncated.py index f0200b736..6f32918bb 100644 --- a/pymc/distributions/truncated.py +++ b/pymc/distributions/truncated.py @@ -51,10 +51,7 @@ class TruncatedRV(SymbolicRandomVariable): - """ - An `Op` constructed from an PyTensor graph - that represents a truncated univariate random variable. - """ + """An `Op` constructed from a PyTensor graph that represents a truncated univariate random variable.""" default_output: int = 0 base_rv_op: Op @@ -232,6 +229,7 @@ def _truncated(op: Op, lower, upper, size, *params): class TruncationCheck(CheckAndRaise): """Implements a check in truncated graphs. + Raises `TruncationError` if the check is not True. """ @@ -239,12 +237,13 @@ def __init__(self, msg=""): super().__init__(TruncationError, msg) def __str__(self): + """Return a string representation of the object.""" return f"TruncationCheck{{{self.msg}}}" class Truncated(Distribution): r""" - Truncated distribution + Truncated distribution. The pdf of a Truncated distribution is diff --git a/pymc/exceptions.py b/pymc/exceptions.py index 7caa2ac3e..f062be527 100644 --- a/pymc/exceptions.py +++ b/pymc/exceptions.py @@ -31,7 +31,7 @@ class IncorrectArgumentsError(ValueError): class TraceDirectoryError(ValueError): - """Error from trying to load a trace from an incorrectly-structured directory,""" + """Error from trying to load a trace from an incorrectly-structured directory.""" pass @@ -77,7 +77,7 @@ def __init__(self, message, actual=None, expected=None): class TruncationError(RuntimeError): - """Exception for errors generated from truncated graphs""" + """Exception for errors generated from truncated graphs.""" class NotConstantValueError(ValueError): diff --git a/pymc/func_utils.py b/pymc/func_utils.py index edcaf5095..21492a34e 100644 --- a/pymc/func_utils.py +++ b/pymc/func_utils.py @@ -37,8 +37,7 @@ def find_constrained_prior( **kwargs, ) -> dict[str, float]: """ - Find optimal parameters to get `mass` % of probability - of a :ref:`distribution ` between `lower` and `upper`. + Find optimal parameters to get `mass` % of probability of a distribution between `lower` and `upper`. Note: only works for one- and two-parameter distributions, as there are exactly two constraints. Fix some combination of parameters diff --git a/pymc/gp/__init__.py b/pymc/gp/__init__.py index 633562d7d..15a49efeb 100644 --- a/pymc/gp/__init__.py +++ b/pymc/gp/__init__.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Gaussian Processes.""" + from pymc.gp import cov, mean, util from pymc.gp.gp import ( TP, diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py index d7f5c6656..22406606b 100644 --- a/pymc/gp/cov.py +++ b/pymc/gp/cov.py @@ -56,9 +56,7 @@ class BaseCovariance: - """ - Base class for kernels/covariance functions. - """ + """Base class for kernels/covariance functions.""" def __call__( self, @@ -116,9 +114,7 @@ def __pow__(self, other) -> "Exponentiated": return Exponentiated(self, other) def __array_wrap__(self, result): - """ - Required to allow radd/rmul by numpy arrays. - """ + """Allow radd/rmul by numpy arrays.""" result = np.squeeze(result) if len(result.shape) <= 1: result = result.reshape(1, 1) @@ -152,8 +148,9 @@ def _alloc(X, *shape: int) -> TensorVariable: class Covariance(BaseCovariance): """ - Base class for kernels/covariance functions with input_dim and active_dims, which excludes - kernels like `Constant` and `WhiteNoise`. + Base class for kernels/covariance functions with input_dim and active_dims. + + This excludes kernels like `Constant` and `WhiteNoise`. Parameters ---------- @@ -177,9 +174,7 @@ def __init__(self, input_dim: int, active_dims: IntSequence | None = None): @property def n_dims(self) -> int: - """The dimensionality of the input, as taken from the - `active_dims`. - """ + """The dimensionality of the input, as taken from the `active_dims`.""" # Evaluate lazily in case this changes. return len(self.active_dims) @@ -205,7 +200,6 @@ def _slice(self, X, Xs=None): class Combination(Covariance): def __init__(self, factor_list: Sequence): """Use constituent factors to get input_dim and active_dims for the Combination covariance.""" - # Check if all input_dim are the same in factor_list input_dims = {factor.input_dim for factor in factor_list if isinstance(factor, Covariance)} @@ -239,9 +233,7 @@ def __init__(self, factor_list: Sequence): self._factor_list.append(factor) def _merge_factors_cov(self, X, Xs=None, diag=False): - """Called to evaluate either all the sums or all the - products of kernels that are possible to evaluate. - """ + """Evaluate either all the sums or all the products of kernels that are possible to evaluate.""" factor_list = [] for factor in self._factor_list: # make sure diag=True is handled properly @@ -269,12 +261,12 @@ def _merge_factors_cov(self, X, Xs=None, diag=False): return factor_list def _merge_factors_psd(self, omega): - """Called to evaluatate spectral densities of combination kernels when possible. + """Evaluate spectral densities of combination kernels when possible. - Implements - a more restricted set of rules than `_merge_factors_cov` -- just additivity of stationary - covariances with defined power spectral densities and multiplication by scalars. Also, the - active_dims for all covariances in the sum must be the same. + Implements a more restricted set of rules than `_merge_factors_cov` -- + just additivity of stationary covariances with defined power spectral + densities and multiplication by scalars. Also, the active_dims for all + covariances in the sum must be the same. """ factor_list = [] for factor in self._factor_list: @@ -565,8 +557,9 @@ def power_spectral_density(self, omega: TensorLike) -> TensorVariable: class ExpQuad(Stationary): r""" - The Exponentiated Quadratic kernel. Also referred to as the Squared - Exponential, or Radial Basis Function kernel. + The Exponentiated Quadratic kernel. + + Also referred to as the Squared Exponential, or Radial Basis Function kernel. .. math:: @@ -580,7 +573,7 @@ def full_from_distance(self, dist: TensorLike, squared: bool = False) -> TensorV def power_spectral_density(self, omega: TensorLike) -> TensorVariable: r""" - The power spectral density for the ExpQuad kernel is: + Power spectral density for the ExpQuad kernel. .. math:: @@ -639,7 +632,7 @@ def full_from_distance(self, dist: TensorLike, squared: bool = False) -> TensorV def power_spectral_density(self, omega: TensorLike) -> TensorVariable: r""" - The power spectral density for the Matern52 kernel is: + Power spectral density for the Matern52 kernel. .. math:: @@ -678,7 +671,7 @@ def full_from_distance(self, dist: TensorLike, squared: bool = False) -> TensorV def power_spectral_density(self, omega: TensorLike) -> TensorVariable: r""" - The power spectral density for the Matern32 kernel is: + Power spectral density for the Matern32 kernel. .. math:: @@ -703,7 +696,7 @@ def power_spectral_density(self, omega: TensorLike) -> TensorVariable: class Matern12(Stationary): r""" - The Matern kernel with nu = 1/2 + The Matern kernel with nu = 1/2. .. math:: @@ -789,7 +782,8 @@ def full_from_distance(self, dist: TensorLike, squared: bool = False) -> TensorV return pt.exp(-0.5 * r2) def power_spectral_density_approx(self, J: TensorLike) -> TensorVariable: - """ + r"""Power spectral density approximation. + Technically, this is not a spectral density but these are the first `m` coefficients of the low rank approximation for the periodic kernel, which are used in the same way. `J` is a vector of `np.arange(m)`. @@ -865,8 +859,7 @@ def diag(self, X: TensorLike) -> TensorVariable: class WarpedInput(Covariance): r""" - Warp the inputs of any kernel using an arbitrary function - defined using PyTensor. + Warp the inputs of any kernel using an arbitrary function defined using PyTensor. .. math:: k(x, x') = k(w(x), w(x')) @@ -977,8 +970,10 @@ def diag(self, X: TensorLike) -> TensorVariable: class Gibbs(Covariance): r""" - The Gibbs kernel. Use an arbitrary lengthscale function defined - using PyTensor. Only tested in one dimension. + The Gibbs kernel. + + Use an arbitrary lengthscale function defined using PyTensor. + Only tested in one dimension. .. math:: k(x, x') = \sqrt{\frac{2\ell(x)\ell(x')}{\ell^2(x) + \ell^2(x')}} @@ -1044,9 +1039,9 @@ def diag(self, X: TensorLike) -> TensorVariable: class ScaledCov(Covariance): r""" - Construct a kernel by multiplying a base kernel with a scaling - function defined using PyTensor. The scaling function is - non-negative, and can be parameterized. + Construct a kernel by multiplying a base kernel with a scaling function defined using PyTensor. + + The scaling function is non-negative, and can be parameterized. .. math:: k(x, x') = \phi(x) k_{\text{base}}(x, x') \phi(x') @@ -1096,6 +1091,7 @@ def full(self, X: TensorLike, Xs: TensorLike | None = None) -> TensorVariable: class Coregion(Covariance): r"""Covariance function for intrinsic/linear coregionalization models. + Adapted from GPy http://gpy.readthedocs.io/en/deploy/GPy.kern.src.html#GPy.kern.src.coregionalize.Coregionalize. This covariance has the form: diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index 1d767b53e..e08ebffbe 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -47,8 +47,7 @@ def _handle_sigma_noise_parameters(sigma, noise): - """Helper function for transition of 'noise' parameter to be named 'sigma'.""" - + """Help transition of 'noise' parameter to be named 'sigma'.""" if (sigma is None and noise is None) or (sigma is not None and noise is not None): raise ValueError("'sigma' argument must be specified.") @@ -60,9 +59,7 @@ def _handle_sigma_noise_parameters(sigma, noise): class Base: - R""" - Base class. - """ + """Base class.""" def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0)): self.mean_func = mean_func @@ -180,8 +177,7 @@ def _build_prior( def prior(self, name, X, n_outputs=1, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): R""" - Returns the GP prior distribution evaluated over the input - locations `X`. + Return the GP prior distribution evaluated over the input locations `X`. This is the prior probability over the space of functions described by its mean and covariance function. @@ -253,8 +249,7 @@ def _build_conditional(self, Xnew, X, f, cov_total, mean_total, jitter): def conditional(self, name, Xnew, given=None, jitter=JITTER_DEFAULT, **kwargs): R""" - Returns the conditional distribution evaluated over new input - locations `Xnew`. + Return the conditional distribution evaluated over new input locations `Xnew`. Given a set of function values `f` that the GP prior was over, the conditional distribution over a @@ -337,6 +332,7 @@ def __init__(self, *, mean_func=Zero(), scale_func=Constant(0.0), cov_func=None, super().__init__(mean_func=mean_func, cov_func=scale_func) def __add__(self, other): + """Add two Student's T processes.""" raise TypeError("Student's T processes aren't additive") def _build_prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): @@ -352,8 +348,7 @@ def _build_prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kw def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): R""" - Returns the TP prior distribution evaluated over the input - locations `X`. + Return the TP prior distribution evaluated over the input locations `X`. This is the prior probability over the space of functions described by its mean and covariance function. @@ -375,7 +370,6 @@ def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): Extra keyword arguments that are passed to :class:`~pymc.MvStudentT` distribution constructor. """ - f = self._build_prior(name, X, reparameterize, jitter, **kwargs) self.X = X self.f = f @@ -397,8 +391,7 @@ def _build_conditional(self, Xnew, X, f, jitter): def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs): R""" - Returns the conditional distribution evaluated over new input - locations `Xnew`. + Return the conditional distribution evaluated over new input locations `Xnew`. Given a set of function values `f` that the TP prior was over, the conditional distribution over a @@ -418,7 +411,6 @@ def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs): Extra keyword arguments that are passed to :class:`~pymc.MvStudentT` distribution constructor. """ - X = self.X f = self.f nu2, mu, cov = self._build_conditional(Xnew, X, f, jitter) @@ -491,8 +483,7 @@ def marginal_likelihood( **kwargs, ): R""" - Returns the marginal likelihood distribution, given the input - locations `X` and the data `y`. + Return the marginal likelihood distribution, given the input locations `X` and the data `y`. This is the integral over the product of the GP prior and a normal likelihood. @@ -598,8 +589,7 @@ def conditional( self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DEFAULT, **kwargs ): R""" - Returns the conditional distribution evaluated over new input - locations `Xnew`. + Return the conditional distribution evaluated over new input locations `Xnew`. Given a set of function values `f` that the GP prior was over, the conditional distribution over a set of new points, `f_*` is: @@ -630,7 +620,6 @@ def conditional( Extra keyword arguments that are passed to :class:`~pymc.MvNormal` distribution constructor. """ - givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter) return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) @@ -646,9 +635,9 @@ def predict( model=None, ): R""" - Return the mean vector and covariance matrix of the conditional - distribution as numpy arrays, given a `point`, such as the MAP - estimate or a sample from a `trace`. + Return mean and covariance of the conditional distribution given a `point`. + + The `point` might be the MAP estimate or a sample from a trace. Parameters ---------- @@ -681,8 +670,7 @@ def predict( def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None, jitter=JITTER_DEFAULT): R""" - Return the mean vector and covariance matrix of the conditional - distribution as symbolic variables. + Return symbolic mean and covariance of the conditional distribution. Parameters ---------- @@ -779,6 +767,7 @@ def __init__(self, approx="VFE", *, mean_func=Zero(), cov_func=Constant(0.0)): super().__init__(mean_func=mean_func, cov_func=cov_func) def __add__(self, other): + """Add two Gaussian processes.""" new_gp = super().__add__(other) if not self.approx == other.approx: raise TypeError("Cannot add GPs with different approximations") @@ -818,9 +807,10 @@ def marginal_likelihood( self, name, X, Xu, y, sigma=None, noise=None, jitter=JITTER_DEFAULT, **kwargs ): R""" - Returns the approximate marginal likelihood distribution, given the input - locations `X`, inducing point locations `Xu`, data `y`, and white noise - standard deviations `sigma`. + Return the approximate marginal likelihood distribution. + + This is given the input locations `X`, inducing point locations `Xu`, + data `y`, and white noise standard deviations `sigma`. Parameters ---------- @@ -845,7 +835,6 @@ def marginal_likelihood( Extra keyword arguments that are passed to :class:`~pymc.MvNormal` distribution constructor. """ - self.X = X self.Xu = Xu self.y = y @@ -911,8 +900,7 @@ def conditional( self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DEFAULT, **kwargs ): R""" - Returns the approximate conditional distribution of the GP evaluated over - new input locations `Xnew`. + Return the approximate conditional distribution of the GP evaluated over new input locations `Xnew`. Parameters ---------- @@ -934,7 +922,6 @@ def conditional( Extra keyword arguments that are passed to :class:`~pymc.MvNormal` distribution constructor. """ - givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter) return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) @@ -1012,6 +999,7 @@ def __init__(self, *, mean_func=Zero(), cov_funcs=(Constant(0.0))): super().__init__(mean_func=mean_func, cov_func=cov_func) def __add__(self, other): + """Add two Gaussian processes.""" raise TypeError("Additive, Kronecker-structured processes not implemented") def _build_prior(self, name, Xs, jitter, **kwargs): @@ -1024,8 +1012,7 @@ def _build_prior(self, name, Xs, jitter, **kwargs): def prior(self, name, Xs, jitter=JITTER_DEFAULT, **kwargs): """ - Returns the prior distribution evaluated over the input - locations `Xs`. + Return the prior distribution evaluated over the input locations `Xs`. Parameters ---------- @@ -1070,8 +1057,7 @@ def _build_conditional(self, Xnew, jitter): def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs): """ - Returns the conditional distribution evaluated over new input - locations `Xnew`. + Return the conditional distribution evaluated over new input locations `Xnew`. `Xnew` will be split by columns and fed to the relevant covariance functions based on their `input_dim`. For example, if @@ -1173,6 +1159,7 @@ def __init__(self, *, mean_func=Zero(), cov_funcs=(Constant(0.0))): super().__init__(mean_func=mean_func, cov_func=cov_func) def __add__(self, other): + """Add two Gaussian processes.""" raise TypeError("Additive, Kronecker-structured processes not implemented") def _build_marginal_likelihood(self, Xs): @@ -1192,8 +1179,7 @@ def _check_inputs(self, Xs, y): def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs): """ - Returns the marginal likelihood distribution, given the input - locations `cartesian(*Xs)` and the data `y`. + Return the marginal likelihood distribution, given the input locations `cartesian(*Xs)` and the data `y`. Parameters ---------- @@ -1271,8 +1257,7 @@ def _build_conditional(self, Xnew, diag, pred_noise): def conditional(self, name, Xnew, pred_noise=False, diag=False, **kwargs): """ - Returns the conditional distribution evaluated over new input - locations `Xnew`, just as in `Marginal`. + Return the conditional distribution evaluated over new input locations `Xnew`, just as in `Marginal`. `Xnew` will be split by columns and fed to the relevant covariance functions based on their `input_dim`. For example, if @@ -1307,9 +1292,9 @@ def conditional(self, name, Xnew, pred_noise=False, diag=False, **kwargs): def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None): R""" - Return the mean vector and covariance matrix of the conditional - distribution as numpy arrays, given a `point`, such as the MAP - estimate or a sample from a `trace`. + Return mean and covariance of the conditional distribution given a `point`. + + The `point` might be the MAP estimate or a sample from a trace. Parameters ---------- @@ -1333,8 +1318,7 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None): def _predict_at(self, Xnew, diag=False, pred_noise=False): R""" - Return the mean vector and covariance matrix of the conditional - distribution as symbolic variables. + Return symbolic mean and covariance of the conditional distribution. Parameters ---------- diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py index f1adf331d..73e80dfd6 100644 --- a/pymc/gp/hsgp_approx.py +++ b/pymc/gp/hsgp_approx.py @@ -31,8 +31,10 @@ def set_boundary(X: TensorLike, c: numbers.Real | TensorLike) -> np.ndarray: - """Set the boundary using `X` and `c`. `X` can be centered around zero but doesn't have to be, - and `c` is usually a scalar multiplier greater than 1.0, but it may also be one value per + """Set the boundary using `X` and `c`. + + `X` can be centered around zero but doesn't have to be, and `c` is usually + a scalar multiplier greater than 1.0, but it may also be one value per dimension or column of `X`. """ # compute radius. Works whether X is 0-centered or not @@ -44,7 +46,6 @@ def set_boundary(X: TensorLike, c: numbers.Real | TensorLike) -> np.ndarray: def calc_eigenvalues(L: TensorLike, m: Sequence[int]): """Calculate eigenvalues of the Laplacian.""" - S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(len(m))]) S_arr = np.vstack([s.flatten() for s in S]).T @@ -57,8 +58,9 @@ def calc_eigenvectors( eigvals: TensorLike, m: Sequence[int], ): - """Calculate eigenvectors of the Laplacian. These are used as basis vectors in the HSGP - approximation. + """Calculate eigenvectors of the Laplacian. + + These are used as basis vectors in the HSGP approximation. """ m_star = int(np.prod(m)) @@ -80,6 +82,7 @@ def calc_basis_periodic( ): """ Calculate basis vectors for the cosine series expansion of the periodic covariance function. + These are derived from the Taylor series representation of the covariance. """ w0 = (2 * np.pi) / period # angular frequency defining the periodicity @@ -94,8 +97,7 @@ def calc_basis_periodic( def approx_hsgp_hyperparams( x_range: list[float], lengthscale_range: list[float], cov_func: str ) -> tuple[int, float]: - """Utility function that uses heuristics to recommend minimum `m` and `c` values, - based on recommendations from Ruitort-Mayol et. al. + """Use heuristics to recommend minimum `m` and `c` values, based on recommendations from Ruitort-Mayol et. al. In practice, you need to choose `c` large enough to handle the largest lengthscales, and `m` large enough to accommodate the smallest lengthscales. Use your prior on the @@ -311,6 +313,7 @@ def __init__( super().__init__(mean_func=mean_func, cov_func=cov_func) def __add__(self, other): + """Add two HSGPs.""" raise NotImplementedError("Additive HSGPs aren't supported.") @property @@ -324,14 +327,18 @@ def L(self, value: TensorLike): self._L = pt.as_tensor_variable(value) def prior_linearized(self, X: TensorLike): - """Linearized version of the HSGP. Returns the Laplace eigenfunctions and the square root + """Linearized version of the HSGP. + + Returns the Laplace eigenfunctions and the square root of the power spectral density needed to create the GP. - This function allows the user to bypass the GP interface and work with the basis - and coefficients directly. This format allows the user to create predictions using - `pm.set_data` similarly to a linear model. It also enables computational speed ups in - multi-GP models, since they may share the same basis. The return values are the Laplace - eigenfunctions `phi`, and the square root of the power spectral density. + This function allows the user to bypass the GP interface and work with + the basis and coefficients directly. This format allows the user to + create predictions using `pm.set_data` similarly to a linear model. It + also enables computational speed ups in multi-GP models, since they may + share the same basis. The return values are the Laplace eigenfunctions + `phi`, and the square root of the power spectral density. + An example is given below. Parameters @@ -427,7 +434,8 @@ def prior( **kwargs, ): # type: ignore R""" - Returns the (approximate) GP prior distribution evaluated over the input locations `X`. + Return the (approximate) GP prior distribution evaluated over the input locations `X`. + For usage examples, refer to `pm.gp.Latent`. Parameters @@ -490,8 +498,7 @@ def _build_conditional(self, Xnew): def conditional(self, name: str, Xnew: TensorLike, dims: str | None = None): # type: ignore R""" - Returns the (approximate) conditional distribution evaluated over new input locations - `Xnew`. + Return the (approximate) conditional distribution evaluated over new input locations `Xnew`. Parameters ---------- @@ -600,16 +607,21 @@ def __init__( super().__init__(mean_func=mean_func, cov_func=cov_func) def prior_linearized(self, X: TensorLike): - """Linearized version of the approximation. Returns the cosine and sine bases and coefficients + """Linearized version of the approximation. + + Returns the cosine and sine bases and coefficients of the expansion needed to create the GP. - This function allows the user to bypass the GP interface and work directly with the basis - and coefficients directly. This format allows the user to create predictions using - `pm.set_data` similarly to a linear model. It also enables computational speed ups in - multi-GP models since they may share the same basis. + This function allows the user to bypass the GP interface and work + directly with the basis and coefficients directly. This format allows + the user to create predictions using `pm.set_data` similarly to a linear + model. It also enables computational speed ups in multi-GP models since + they may share the same basis. + + Correct results when using `prior_linearized` in tandem with + `pm.set_data` and `pm.MutableData` require that the `Xs` are + zero-centered, so its mean must be subtracted. - Correct results when using `prior_linearized` in tandem with `pm.set_data` and - `pm.MutableData` require that the `Xs` are zero-centered, so it's mean must be subtracted. An example is given below. Parameters @@ -685,7 +697,8 @@ def prior_linearized(self, X: TensorLike): def prior(self, name: str, X: TensorLike, dims: str | None = None): # type: ignore R""" - Returns the (approximate) GP prior distribution evaluated over the input locations `X`. + Return the (approximate) GP prior distribution evaluated over the input locations `X`. + For usage examples, refer to `pm.gp.Latent`. Parameters @@ -736,8 +749,7 @@ def _build_conditional(self, Xnew): def conditional(self, name: str, Xnew: TensorLike, dims: str | None = None): # type: ignore R""" - Returns the (approximate) conditional distribution evaluated over new input locations - `Xnew`. + Return the (approximate) conditional distribution evaluated over new input locations `Xnew`. Parameters ---------- diff --git a/pymc/gp/mean.py b/pymc/gp/mean.py index 30a6fe244..800cbf556 100644 --- a/pymc/gp/mean.py +++ b/pymc/gp/mean.py @@ -18,9 +18,7 @@ class Mean: - R""" - Base class for mean functions - """ + """Base class for mean functions.""" def __call__(self, X): R""" @@ -40,17 +38,14 @@ def __mul__(self, other): class Zero(Mean): - R""" - Zero mean function for Gaussian process. - - """ + """Zero mean function for Gaussian process.""" def __call__(self, X): return pt.alloc(0.0, X.shape[0]) class Constant(Mean): - R""" + """ Constant mean function for Gaussian process. Parameters @@ -68,7 +63,7 @@ def __call__(self, X): class Linear(Mean): - R""" + """ Linear mean function for Gaussian process. Parameters diff --git a/pymc/gp/util.py b/pymc/gp/util.py index 734d36ed3..b2d7447b1 100644 --- a/pymc/gp/util.py +++ b/pymc/gp/util.py @@ -31,6 +31,7 @@ def replace_with_values(vars_needed, replacements=None, model=None): R""" Replace random variable nodes in the graph with values given by the replacements dict. + Uses untransformed versions of the inputs, performs some basic input validation. Parameters @@ -76,7 +77,7 @@ def replace_with_values(vars_needed, replacements=None, model=None): def stabilize(K, jitter=JITTER_DEFAULT): R""" - Adds small diagonal to a covariance matrix. + Add small diagonal to a covariance matrix. Often the matrices calculated from covariance functions, `K = cov_func(X)` do not appear numerically to be positive semi-definite. Adding a small @@ -94,8 +95,7 @@ def stabilize(K, jitter=JITTER_DEFAULT): def kmeans_inducing_points(n_inducing, X, **kmeans_kwargs): R""" - Use the K-means algorithm to initialize the locations `X` for the inducing - points `fu`. + Use the K-means algorithm to initialize the locations `X` for the inducing points `fu`. Parameters ---------- @@ -131,7 +131,7 @@ def kmeans_inducing_points(n_inducing, X, **kmeans_kwargs): def conditioned_vars(varnames): - """Decorator for validating attrs that are conditioned on.""" + """Validate attrs that are conditioned on.""" def gp_wrapper(cls): def make_getter(name): @@ -174,7 +174,7 @@ def plot_gp_dist( fill_kwargs=None, samples_kwargs=None, ): - """A helper function for plotting 1D GP posteriors from trace + """Plot 1D GP posteriors from trace. Parameters ---------- diff --git a/pymc/initial_point.py b/pymc/initial_point.py index 2e06f51f5..15f4f887c 100644 --- a/pymc/initial_point.py +++ b/pymc/initial_point.py @@ -35,9 +35,12 @@ def convert_str_to_rv_dict( model, start: StartDict ) -> dict[TensorVariable, np.ndarray | Variable | str | None]: - """Helper function for converting a user-provided start dict with str keys of (transformed) variable names + """Convert a user-provided start dict to an untransformed RV start dict. + + Converts a dict of str keys of (transformed) variable names to a dict mapping the RV tensors to untransformed initvals. - TODO: Deprecate this functionality and only accept TensorVariables as keys + + TODO: Deprecate this functionality and only accept TensorVariables as keys. """ initvals = {} for key, initval in start.items(): @@ -59,7 +62,7 @@ def make_initial_point_fns_per_chain( jitter_rvs: set[TensorVariable] | None = None, chains: int, ) -> list[Callable]: - """Create an initial point function for each chain, as defined by initvals + """Create an initial point function for each chain, as defined by initvals. If a single initval dictionary is passed, the function is replicated for each chain, otherwise a unique function is compiled for each entry in the dictionary. @@ -130,7 +133,6 @@ def make_initial_point_fn( return_transformed : bool If `True` the returned variables will correspond to transformed initial values. """ - sdict_overrides = convert_str_to_rv_dict(model, overrides or {}) initval_strats = { **model.rvs_to_initial_values, @@ -183,7 +185,7 @@ def make_initial_point_expression( default_strategy: str = "support_point", return_transformed: bool = False, ) -> list[TensorVariable]: - """Creates the tensor variables that need to be evaluated to obtain an initial point. + """Create the tensor variables that need to be evaluated to obtain an initial point. Parameters ---------- diff --git a/pymc/logprob/__init__.py b/pymc/logprob/__init__.py index bed9ee3a9..aaa8b2052 100644 --- a/pymc/logprob/__init__.py +++ b/pymc/logprob/__init__.py @@ -34,6 +34,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +"""Conversion of PyMC graphs into logp graphs.""" + from pymc.logprob.basic import ( conditional_logp, icdf, diff --git a/pymc/logprob/abstract.py b/pymc/logprob/abstract.py index 097bf84af..f47c39e2b 100644 --- a/pymc/logprob/abstract.py +++ b/pymc/logprob/abstract.py @@ -76,7 +76,7 @@ def _logprob( def _logprob_helper(rv, *values, **kwargs): - """Helper that calls `_logprob` dispatcher.""" + """Help call `_logprob` dispatcher.""" logprob = _logprob(rv.owner.op, values, *rv.owner.inputs, **kwargs) name = rv.name @@ -109,7 +109,7 @@ def _logcdf( def _logcdf_helper(rv, value, **kwargs): - """Helper that calls `_logcdf` dispatcher.""" + """Help call `_logcdf` dispatcher.""" logcdf = _logcdf(rv.owner.op, value, *rv.owner.inputs, name=rv.name, **kwargs) if rv.name: @@ -134,7 +134,7 @@ def _icdf( def _icdf_helper(rv, value, **kwargs): - """Helper that calls `_icdf` dispatcher.""" + """Help call `_icdf` dispatcher.""" rv_icdf = _icdf(rv.owner.op, value, *rv.owner.inputs, **kwargs) if rv.name: @@ -144,14 +144,14 @@ def _icdf_helper(rv, value, **kwargs): class MeasurableOp(abc.ABC): - """An operation whose outputs can be assigned a measure/log-probability""" + """An operation whose outputs can be assigned a measure/log-probability.""" MeasurableOp.register(RandomVariable) class MeasurableElemwise(MeasurableOp, Elemwise): - """Base class for Measurable Elemwise variables""" + """Base class for Measurable Elemwise variables.""" valid_scalar_types: tuple[MetaType, ...] = () @@ -164,6 +164,7 @@ def __init__(self, scalar_op, *args, **kwargs): super().__init__(scalar_op, *args, **kwargs) def __str__(self): + """Return a string representation of the object.""" return f"Measurable{super().__str__()}" diff --git a/pymc/logprob/basic.py b/pymc/logprob/basic.py index d8188f264..2e62660cb 100644 --- a/pymc/logprob/basic.py +++ b/pymc/logprob/basic.py @@ -87,7 +87,6 @@ def _warn_rvs_in_inferred_graph(graph: TensorVariable | Sequence[TensorVariable] This makes it impossible (or difficult) to replace it by the respective values afterward, so we instruct users to do it beforehand. """ - rvs_in_graph = _find_unallowed_rvs_in_graph(graph) if rvs_in_graph: warnings.warn( @@ -415,8 +414,7 @@ def conditional_logp( extra_rewrites: GraphRewriter | NodeRewriter | None = None, **kwargs, ) -> dict[TensorVariable, TensorVariable]: - r"""Create a map between variables and conditional log-probabilities - such that the sum is their joint log-probability. + r"""Create a map between variables and conditional logps such that the sum is their joint logp. The `rv_values` dictionary specifies a joint probability graph defined by pairs of random variables and respective measure-space input parameters @@ -583,7 +581,6 @@ def transformed_conditional_logp( This helper will only return the subset of logprob terms corresponding to `rvs`. All rvs_to_values and rvs_to_transforms mappings are required. """ - transform_rewrite = None values_to_transforms = { rvs_to_values[rv]: transform diff --git a/pymc/logprob/censoring.py b/pymc/logprob/censoring.py index 248c285ba..2104ecb6e 100644 --- a/pymc/logprob/censoring.py +++ b/pymc/logprob/censoring.py @@ -90,7 +90,7 @@ def find_measurable_clips(fgraph: FunctionGraph, node: Node) -> list[TensorVaria @_logprob.register(MeasurableClip) def clip_logprob(op, values, base_rv, lower_bound, upper_bound, **kwargs): - r"""Logprob of a clipped censored distribution + r"""Logprob of a clipped censored distribution. The probability is given by .. math:: @@ -174,7 +174,7 @@ def find_measurable_roundings(fgraph: FunctionGraph, node: Node) -> list[TensorV @_logprob.register(MeasurableRound) def round_logprob(op, values, base_rv, **kwargs): - r"""Logprob of a rounded censored distribution + r"""Logprob of a rounded censored distribution. The probability of a distribution rounded to the nearest integer is given by .. math:: diff --git a/pymc/logprob/checks.py b/pymc/logprob/checks.py index c9c60bb0f..c8c21ef61 100644 --- a/pymc/logprob/checks.py +++ b/pymc/logprob/checks.py @@ -61,8 +61,7 @@ def logprob_specify_shape(op, values, inner_rv, *shapes, **kwargs): @node_rewriter([SpecifyShape]) def find_measurable_specify_shapes(fgraph, node) -> list[TensorVariable] | None: - r"""Finds `SpecifyShapeOp`\s for which a `logprob` can be computed.""" - + r"""Find `SpecifyShapeOp`\s for which a `logprob` can be computed.""" if isinstance(node.op, MeasurableSpecifyShape): return None # pragma: no cover @@ -99,8 +98,7 @@ def logprob_check_and_raise(op, values, inner_rv, *assertions, **kwargs): @node_rewriter([CheckAndRaise]) def find_measurable_check_and_raise(fgraph, node) -> list[TensorVariable] | None: - r"""Finds `AssertOp`\s for which a `logprob` can be computed.""" - + r"""Find `AssertOp`\s for which a `logprob` can be computed.""" if isinstance(node.op, MeasurableCheckAndRaise): return None # pragma: no cover diff --git a/pymc/logprob/cumsum.py b/pymc/logprob/cumsum.py index af7f73888..4fd5a6eae 100644 --- a/pymc/logprob/cumsum.py +++ b/pymc/logprob/cumsum.py @@ -76,8 +76,7 @@ def logprob_cumsum(op, values, base_rv, **kwargs): @node_rewriter([CumOp]) def find_measurable_cumsums(fgraph, node) -> list[TensorVariable] | None: - r"""Finds `Cumsums`\s for which a `logprob` can be computed.""" - + r"""Find `Cumsums`\s for which a `logprob` can be computed.""" if not (isinstance(node.op, CumOp) and node.op.mode == "add"): return None diff --git a/pymc/logprob/mixture.py b/pymc/logprob/mixture.py index 1fd4b1156..55e506ad9 100644 --- a/pymc/logprob/mixture.py +++ b/pymc/logprob/mixture.py @@ -244,7 +244,6 @@ def get_stack_mixture_vars( node: Apply, ) -> tuple[list[TensorVariable] | None, int | None]: r"""Extract the mixture terms from a `*Subtensor*` applied to stacked `MeasurableVariable`\s.""" - assert isinstance(node.op, subtensor_ops) joined_rvs = node.inputs[0] diff --git a/pymc/logprob/order.py b/pymc/logprob/order.py index 51833a128..6eceb819d 100644 --- a/pymc/logprob/order.py +++ b/pymc/logprob/order.py @@ -61,7 +61,7 @@ class MeasurableMax(MeasurableOp, Max): class MeasurableMaxDiscrete(MeasurableOp, Max): - """A placeholder used to specify a log-likelihood for sub-graphs of maxima of discrete variables""" + """A placeholder used to specify a log-likelihood for sub-graphs of maxima of discrete variables.""" @node_rewriter([Max]) diff --git a/pymc/logprob/rewriting.py b/pymc/logprob/rewriting.py index 08373ed7f..cd390e13a 100644 --- a/pymc/logprob/rewriting.py +++ b/pymc/logprob/rewriting.py @@ -106,7 +106,6 @@ def remove_promised_valued_rvs(outputs): @node_rewriter((Elemwise, Alloc, DimShuffle, *subtensor_ops)) def local_lift_DiracDelta(fgraph, node): r"""Lift basic `Op`\s through `DiracDelta`\s.""" - if len(node.outputs) > 1: return @@ -200,7 +199,7 @@ def construct_ir_fgraph( A custom IR rewriter can be specified. By default, `logprob_rewrites_db.query(RewriteDatabaseQuery(include=["basic"]))` is used. - Our measurable IR takes the form of an PyTensor graph that is more-or-less + Our measurable IR takes the form of a PyTensor graph that is more-or-less equivalent to a given PyTensor graph (i.e. the keys of `rv_values`) but contains `Op`s that are subclasses of the `MeasurableOp` type in place of ones that do not inherit from `MeasurableOp` in the original @@ -223,7 +222,6 @@ def construct_ir_fgraph( ------- A `FunctionGraph` of the measurable IR. """ - # We add `ShapeFeature` because it will get rid of references to the old # `RandomVariable`s that have been lifted; otherwise, it will be difficult # to give good warnings when an unaccounted for `RandomVariable` is encountered diff --git a/pymc/logprob/scan.py b/pymc/logprob/scan.py index 4b643b730..ecd04b9c7 100644 --- a/pymc/logprob/scan.py +++ b/pymc/logprob/scan.py @@ -68,6 +68,7 @@ class MeasurableScan(MeasurableOp, Scan): """A placeholder used to specify a log-likelihood for a scan sub-graph.""" def __str__(self): + """Return a string representation of the object.""" return f"Measurable{super().__str__()}" @@ -100,7 +101,6 @@ def convert_outer_out_to_in( A `ScanArgs` object for a `Scan` in which `outer_out_vars` has been converted to an outer-graph input. """ - output_scan_args = copy(input_scan_args) inner_outs_to_new_inner_ins = {} @@ -396,7 +396,6 @@ def create_inner_out_logp(value_map: dict[TensorVariable, TensorVariable]) -> Te @node_rewriter([Scan, Subtensor]) def find_measurable_scans(fgraph, node): r"""Find `Scan`\s for which a `logprob` can be computed.""" - if isinstance(node.op, Subtensor): node = node.inputs[0].owner if not (node and isinstance(node.op, Scan)): diff --git a/pymc/logprob/tensor.py b/pymc/logprob/tensor.py index 750ace569..0f4624ca3 100644 --- a/pymc/logprob/tensor.py +++ b/pymc/logprob/tensor.py @@ -140,7 +140,7 @@ def logprob_join(op, values, axis, *base_rvs, **kwargs): @node_rewriter([MakeVector, Join]) def find_measurable_stacks(fgraph, node) -> list[TensorVariable] | None: - r"""Finds `Joins`\s and `MakeVector`\s for which a `logprob` can be computed.""" + r"""Find `Joins`\s and `MakeVector`\s for which a `logprob` can be computed.""" from pymc.pytensorf import toposort_replace if isinstance(node.op, MeasurableOp): @@ -218,7 +218,7 @@ def logprob_dimshuffle(op: MeasurableDimShuffle, values, base_var, **kwargs): @node_rewriter([DimShuffle]) def find_measurable_dimshuffles(fgraph, node) -> list[TensorVariable] | None: - r"""Finds `Dimshuffle`\s for which a `logprob` can be computed.""" + r"""Find `Dimshuffle`\s for which a `logprob` can be computed.""" from pymc.distributions.distribution import SymbolicRandomVariable if isinstance(node.op, MeasurableOp): diff --git a/pymc/logprob/transform_value.py b/pymc/logprob/transform_value.py index fa013dbf3..f093ddbf2 100644 --- a/pymc/logprob/transform_value.py +++ b/pymc/logprob/transform_value.py @@ -139,7 +139,6 @@ def transform_values(fgraph: FunctionGraph, node: Apply) -> list[Apply] | None: variable is specified on the log scale and back-transform it to obtain ``Y`` on the natural scale. """ - values_to_transforms: TransformValuesMapping | None = getattr( fgraph, "values_to_transforms", None ) @@ -210,7 +209,8 @@ def __init__( self, values_to_transforms: dict[TensorVariable, Transform | None], ): - """ + """Create the rewriter. + Parameters ---------- values_to_transforms @@ -220,7 +220,6 @@ def __init__( not be transformed. """ - self.values_to_transforms = values_to_transforms def add_requirements(self, fgraph): diff --git a/pymc/logprob/transforms.py b/pymc/logprob/transforms.py index d6dd0894b..41233223b 100644 --- a/pymc/logprob/transforms.py +++ b/pymc/logprob/transforms.py @@ -136,8 +136,10 @@ def forward(self, value: TensorVariable, *inputs: Variable) -> TensorVariable: def backward( self, value: TensorVariable, *inputs: Variable ) -> TensorVariable | tuple[TensorVariable, ...]: - """Invert the transformation. Multiple values may be returned when the - transformation is not 1-to-1""" + """Invert the transformation. + + Multiple values may be returned when the transformation is not 1-to-1. + """ def log_jac_det(self, value: TensorVariable, *inputs) -> TensorVariable: """Construct the log of the absolute value of the Jacobian determinant.""" @@ -153,11 +155,12 @@ def log_jac_det(self, value: TensorVariable, *inputs) -> TensorVariable: return pt.log(pt.abs(pt.nlinalg.det(pt.atleast_2d(jacobian(phi_inv, [value])[0])))) def __str__(self): + """Return a string representation of the object.""" return f"{self.__class__.__name__}" class MeasurableTransform(MeasurableElemwise): - """A placeholder used to specify a log-likelihood for a transformed measurable variable""" + """A placeholder used to specify a log-likelihood for a transformed measurable variable.""" valid_scalar_types = ( Exp, @@ -370,7 +373,7 @@ def measurable_neg_to_product(fgraph, node): @node_rewriter([sub]) def measurable_sub_to_neg(fgraph, node): - """Convert subtraction involving `MeasurableVariable`s to addition with neg""" + """Convert subtraction involving `MeasurableVariable`s to addition with neg.""" if not filter_measurable_variables(node.inputs): return None @@ -452,7 +455,6 @@ def measurable_power_exponent_to_exp(fgraph, node): ) def find_measurable_transforms(fgraph: FunctionGraph, node: Node) -> list[Node] | None: """Find measurable transformations from Elemwise operators.""" - # Node was already converted if isinstance(node.op, MeasurableOp): return None @@ -846,7 +848,7 @@ class IntervalTransform(Transform): name = "interval" def __init__(self, args_fn: Callable[..., tuple[Variable | None, Variable | None]]): - """ + """Create the IntervalTransform object. Parameters ---------- diff --git a/pymc/logprob/utils.py b/pymc/logprob/utils.py index adc75b556..e96426fbe 100644 --- a/pymc/logprob/utils.py +++ b/pymc/logprob/utils.py @@ -80,7 +80,6 @@ def replace_rvs_by_values( rvs_to_transforms, optional Mapping between the original graph RVs and respective value transforms """ - if rvs_to_transforms: # Conditional transforms like Interval can reference variables in the original RV graph # To avoid mutating the original graphs in place, we have to clone them @@ -200,7 +199,7 @@ def expand_fn(var): class ParameterValueError(ValueError): - """Exception for invalid parameters values in logprob graphs""" + """Exception for invalid parameters values in logprob graphs.""" class CheckParameterValue(CheckAndRaise): @@ -216,12 +215,13 @@ def __init__(self, msg: str = "", can_be_replaced_by_ninf: bool = False): self.can_be_replaced_by_ninf = can_be_replaced_by_ninf def __str__(self): + """Return a string representation of the object.""" return f"Check{{{self.msg}}}" @node_rewriter(tracks=[CheckParameterValue]) def local_remove_check_parameter(fgraph, node): - """Rewrite that removes CheckParameterValue + """Rewrite that removes CheckParameterValue. This is used when compile_rv_inplace """ @@ -302,7 +302,6 @@ def diracdelta_logprob(op, values, *inputs, **kwargs): def find_negated_var(var): """Return a variable that is being multiplied by -1 or None otherwise.""" - if not ( var.owner and isinstance(var.owner.op, Elemwise) and isinstance(var.owner.op.scalar_op, Mul) ): diff --git a/pymc/math.py b/pymc/math.py index b5fc50a8e..3aba39314 100644 --- a/pymc/math.py +++ b/pymc/math.py @@ -184,8 +184,9 @@ def kronecker(*Ks): - r"""Return the Kronecker product of arguments: - :math:`K_1 \otimes K_2 \otimes ... \otimes K_D` + r"""Return the Kronecker product of arguments. + + math:`K_1 \otimes K_2 \otimes ... \otimes K_D` Parameters ---------- @@ -201,7 +202,7 @@ def kronecker(*Ks): def cartesian(*arrays): - """Makes the Cartesian product of arrays. + """Make the Cartesian product of arrays. Parameters ---------- @@ -219,7 +220,7 @@ def cartesian(*arrays): def kron_matrix_op(krons, m, op): - r"""Apply op to krons and m in a way that reproduces ``op(kronecker(*krons), m)`` + r"""Apply op to krons and m in a way that reproduces ``op(kronecker(*krons), m)``. Parameters ---------- @@ -264,7 +265,7 @@ def flat_outer(a, b): def kron_diag(*diags): - """Returns diagonal of a kronecker product. + """Return diagonal of a kronecker product. Parameters ---------- @@ -275,12 +276,12 @@ def kron_diag(*diags): def logdiffexp(a, b): - """log(exp(a) - exp(b))""" + """Return log(exp(a) - exp(b)).""" return a + pt.log1mexp(b - a) def logdiffexp_numpy(a, b): - """log(exp(a) - exp(b))""" + """Return log(exp(a) - exp(b)).""" warnings.warn( "pymc.math.logdiffexp_numpy is being deprecated.", FutureWarning, @@ -330,7 +331,9 @@ def log1mexp(x, *, negative_input=False): def log1mexp_numpy(x, *, negative_input=False): """Return log(1 - exp(x)). + This function is numerically more stable than the naive approach. + For details, see https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf """ @@ -363,9 +366,9 @@ def flatten_list(tensors): class LogDet(Op): - r"""Compute the logarithm of the absolute determinant of a square - matrix M, log(abs(det(M))) on the CPU. Avoids det(M) overflow/ - underflow. + r"""Compute the logarithm of the absolute determinant of a square matrix M, log(abs(det(M))) on the CPU. + + Avoids det(M) overflow/underflow. Notes ----- @@ -460,9 +463,7 @@ def expand_packed_triangular(n, packed, lower=True, diagonal_only=False): class BatchedDiag(Op): - """ - Fast BatchedDiag allocation - """ + """Fast BatchedDiag allocation.""" __props__ = () @@ -509,8 +510,7 @@ def batched_diag(C): def block_diagonal(matrices, sparse=False, format="csr"): - r"""See pt.slinalg.block_diag or - pytensor.sparse.basic.block_diag for reference + r"""See pt.slinalg.block_diag or pytensor.sparse.basic.block_diag for reference. Parameters ---------- diff --git a/pymc/model/__init__.py b/pymc/model/__init__.py index d6316898a..4caa70137 100644 --- a/pymc/model/__init__.py +++ b/pymc/model/__init__.py @@ -11,5 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""Model object.""" + from pymc.model.core import * from pymc.model.core import ValueGradFunction diff --git a/pymc/model/core.py b/pymc/model/core.py index 41d4e0864..48d2117eb 100644 --- a/pymc/model/core.py +++ b/pymc/model/core.py @@ -96,9 +96,7 @@ class ContextMeta(type): - """Functionality for objects that put themselves in a context using - the `with` statement. - """ + """Functionality for objects that put themselves in a context manager.""" def __new__(cls, name, bases, dct, **kwargs): """Add __enter__ and __exit__ methods to the class.""" @@ -128,9 +126,10 @@ def __init__(cls, name, bases, nmspc, context_class: type | None = None, **kwarg super().__init__(name, bases, nmspc) def get_context(cls, error_if_none=True, allow_block_model_access=False) -> T | None: - """Return the most recently pushed context object of type ``cls`` - on the stack, or ``None``. If ``error_if_none`` is True (default), - raise a ``TypeError`` instead of returning ``None``.""" + """Return the most recently pushed context object of type ``cls`` on the stack, or ``None``. + + If ``error_if_none`` is True (default), raise a ``TypeError`` instead of returning ``None``. + """ try: candidate: T | None = cls.get_contexts()[-1] except IndexError: @@ -144,8 +143,7 @@ def get_context(cls, error_if_none=True, allow_block_model_access=False) -> T | return candidate def get_contexts(cls) -> list[T]: - """Return a stack of context instances for the ``context_class`` - of ``cls``.""" + """Return a stack of context instances for the ``context_class`` of ``cls``.""" # This lazily creates the context class's contexts # thread-local object, as needed. This seems inelegant to me, # but since the context class is not guaranteed to exist when @@ -206,10 +204,7 @@ def __call__(cls, *args, **kwargs): def modelcontext(model: Optional["Model"]) -> "Model": - """ - Return the given model or, if none was supplied, try to find one in - the context stack. - """ + """Return the given model or, if None was supplied, try to find one in the context stack.""" if model is None: model = Model.get_context(error_if_none=False) @@ -221,7 +216,7 @@ def modelcontext(model: Optional["Model"]) -> "Model": class ValueGradFunction: - """Create an PyTensor function that computes a value and its gradient. + """Create a PyTensor function that computes a value and its gradient. Parameters ---------- @@ -502,9 +497,11 @@ class Model(WithMemoization, metaclass=ContextMeta): if TYPE_CHECKING: - def __enter__(self: Self) -> Self: ... + def __enter__(self: Self) -> Self: + """Enter the context manager.""" - def __exit__(self, exc_type: None, exc_val: None, exc_tb: None) -> None: ... + def __exit__(self, exc_type: None, exc_val: None, exc_tb: None) -> None: + """Exit the context manager.""" def __new__(cls, *args, model: Union[Literal[UNSET], None, "Model"] = UNSET, **kwargs): # resolves the parent instance @@ -596,7 +593,7 @@ def isroot(self): return self.parent is None def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): - """Compile an PyTensor function that computes logp and gradient. + """Compile a PyTensor function that computes logp and gradient. Parameters ---------- @@ -851,30 +848,27 @@ def d2logp( @property def datalogp(self) -> Variable: - """PyTensor scalar of log-probability of the observed variables and - potential terms""" + """PyTensor scalar of log-probability of the observed variables and potential terms.""" return self.observedlogp + self.potentiallogp @property def varlogp(self) -> Variable: - """PyTensor scalar of log-probability of the unobserved random variables - (excluding deterministic).""" + """PyTensor scalar of log-probability of the unobserved random variables (excluding deterministic).""" return self.logp(vars=self.free_RVs) @property def varlogp_nojac(self) -> Variable: - """PyTensor scalar of log-probability of the unobserved random variables - (excluding deterministic) without jacobian term.""" + """PyTensor scalar of log-probability of the unobserved random variables (excluding deterministic) without jacobian term.""" return self.logp(vars=self.free_RVs, jacobian=False) @property def observedlogp(self) -> Variable: - """PyTensor scalar of log-probability of the observed variables""" + """PyTensor scalar of log-probability of the observed variables.""" return self.logp(vars=self.observed_RVs) @property def potentiallogp(self) -> Variable: - """PyTensor scalar of log-probability of the Potential terms""" + """PyTensor scalar of log-probability of the Potential terms.""" # Convert random variables in Potential expression into their log-likelihood # inputs and apply their transforms, if any potentials = self.replace_rvs_by_values(self.potentials) @@ -885,17 +879,12 @@ def potentiallogp(self) -> Variable: @property def value_vars(self): - """List of unobserved random variables used as inputs to the model's - log-likelihood (which excludes deterministics). - """ + """List of unobserved random variables used as inputs to the model's log-likelihood (which excludes deterministics).""" return [self.rvs_to_values[v] for v in self.free_RVs] @property def unobserved_value_vars(self): - """List of all random variables (including untransformed projections), - as well as deterministics used as inputs and outputs of the model's - log-likelihood graph - """ + """List of all random variables (including untransformed projections), as well as deterministics used as inputs and outputs of the model's log-likelihood graph.""" vars = [] transformed_rvs = [] for rv in self.free_RVs: @@ -915,18 +904,19 @@ def unobserved_value_vars(self): @property def discrete_value_vars(self): - """All the discrete value variables in the model""" + """All the discrete value variables in the model.""" return list(typefilter(self.value_vars, discrete_types)) @property def continuous_value_vars(self): - """All the continuous value variables in the model""" + """All the continuous value variables in the model.""" return list(typefilter(self.value_vars, continuous_types)) @property def basic_RVs(self): - """List of random variables the model is defined in terms of - (which excludes deterministics). + """List of random variables the model is defined in terms of. + + This excludes deterministics. These are the actual random variable terms that make up the "sample-space" graph (i.e. you can sample these graphs by compiling them @@ -982,7 +972,7 @@ def add_coord( *, length: int | Variable | None = None, ): - """Registers a dimension coordinate with the model. + """Register a dimension coordinate with the model. Parameters ---------- @@ -1083,7 +1073,7 @@ def set_dim(self, name: str, new_length: int, coord_values: Sequence | None = No return def initial_point(self, random_seed: SeedSequenceSeed = None) -> dict[str, np.ndarray]: - """Computes the initial point of the model. + """Compute the initial point of the model. Parameters ---------- @@ -1099,7 +1089,7 @@ def initial_point(self, random_seed: SeedSequenceSeed = None) -> dict[str, np.nd return Point(fn(random_seed), model=self) def set_initval(self, rv_var, initval): - """Sets an initial value (strategy) for a random variable.""" + """Set an initial value (strategy) for a random variable.""" if initval is not None and not isinstance(initval, Variable | str): # Convert scalars or array-like inputs to ndarrays initval = rv_var.type.filter(initval) @@ -1112,7 +1102,7 @@ def set_data( values: Sequence | np.ndarray, coords: dict[str, Sequence] | None = None, ): - """Changes the values of a data variable in the model. + """Change the values of a data variable in the model. In contrast to pm.Data().set_value, this method can also update the corresponding coordinates. @@ -1419,8 +1409,7 @@ def create_value_var( transform: Transform, value_var: Variable | None = None, ) -> TensorVariable: - """Create a ``TensorVariable`` that will be used as the random - variable's "value" in log-likelihood graphs. + """Create a ``TensorVariable`` that will be used as the random variable's "value" in log-likelihood graphs. In general, we'll call this type of variable the "value" variable. @@ -1545,7 +1534,7 @@ def prefix(self) -> str: return name def name_for(self, name): - """Checks if name has prefix and adds if needed""" + """Check if name has prefix and adds if needed.""" name = self._validate_name(name) if self.prefix: if not name.startswith(self.prefix + "::"): @@ -1556,7 +1545,7 @@ def name_for(self, name): return name def name_of(self, name): - """Checks if name has prefix and deletes if needed""" + """Check if name has prefix and deletes if needed.""" name = self._validate_name(name) if not self.prefix or not name: return name @@ -1566,6 +1555,7 @@ def name_of(self, name): return name def __getitem__(self, key): + """Get the variable named `key`.""" try: return self.named_vars[key] except KeyError as e: @@ -1575,17 +1565,20 @@ def __getitem__(self, key): raise e def __contains__(self, key): + """Check if the model contains a variable named `key`.""" return key in self.named_vars or self.name_for(key) in self.named_vars def __copy__(self): + """Clone the model.""" return self.copy() def __deepcopy__(self, _): + """Clone the model.""" return self.copy() def copy(self): """ - Clone the model + Clone the model. To access variables in the cloned model use `cloned_model["var_name"]`. @@ -1667,7 +1660,7 @@ def compile_fn( point_fn: bool = True, **kwargs, ) -> PointFunc | Function: - """Compiles an PyTensor function + """Compiles a PyTensor function. Parameters ---------- @@ -1705,8 +1698,7 @@ def compile_fn( return fn def profile(self, outs, *, n=1000, point=None, profile=True, **kwargs): - """Compiles and profiles an PyTensor function which returns ``outs`` and - takes values of model vars as a dict as an argument. + """Compile and profile a PyTensor function which returns ``outs`` and takes values of model vars as a dict as an argument. Parameters ---------- @@ -1752,7 +1744,7 @@ def update_start_vals(self, a: dict[str, np.ndarray], b: dict[str, np.ndarray]): ) def eval_rv_shapes(self) -> dict[str, tuple[int, ...]]: - """Evaluates shapes of untransformed AND transformed free variables. + """Evaluate shapes of untransformed AND transformed free variables. Returns ------- @@ -1778,8 +1770,7 @@ def eval_rv_shapes(self) -> dict[str, tuple[int, ...]]: return {name: tuple(shape) for name, shape in zip(names, f())} def check_start_vals(self, start, **kwargs): - r"""Check that the starting values for MCMC do not cause the relevant log probability - to evaluate to something invalid (e.g. Inf or NaN) + r"""Check that the logp is defined and finite at the starting point. Parameters ---------- @@ -1830,7 +1821,7 @@ def check_start_vals(self, start, **kwargs): ) def point_logps(self, point=None, round_vals=2, **kwargs): - """Computes the log probability of `point` for all random variables in the model. + """Compute the log probability of `point` for all random variables in the model. Parameters ---------- @@ -2089,7 +2080,7 @@ def to_graphviz( class BlockModelAccess(Model): - """Can be used to prevent user access to Model contexts""" + """Can be used to prevent user access to Model contexts.""" def __init__(self, *args, error_msg_on_access="Model access is blocked", **kwargs): self.error_msg_on_access = error_msg_on_access @@ -2104,9 +2095,11 @@ def new_or_existing_block_model_access(*args, **kwargs): def set_data(new_data, model=None, *, coords=None): - """Sets the value of one or more data container variables. Note that the shape is also - dynamic, it is updated when the value is changed. See the examples below for two common - use-cases that take advantage of this behavior. + """Set the value of one or more data container variables. + + Note that the shape is also dynamic, it is updated when the value is + changed. See the examples below for two common use-cases that take + advantage of this behavior. Parameters ---------- @@ -2184,7 +2177,7 @@ def compile_fn( model: Model | None = None, **kwargs, ) -> PointFunc | Function: - """Compiles an PyTensor function + """Compiles a PyTensor function. Parameters ---------- @@ -2204,7 +2197,6 @@ def compile_fn( ------- Compiled PyTensor function """ - model = modelcontext(model) return model.compile_fn( outs, @@ -2216,7 +2208,9 @@ def compile_fn( def Point(*args, filter_model_vars=False, **kwargs) -> dict[VarName, np.ndarray]: - """Build a point. Uses same args as dict() does. + """Build a point. + + Uses same args as dict() does. Filters out variables not in the model. All keys are strings. Parameters diff --git a/pymc/model/fgraph.py b/pymc/model/fgraph.py index 8c37861c8..78ad61306 100644 --- a/pymc/model/fgraph.py +++ b/pymc/model/fgraph.py @@ -30,9 +30,7 @@ class ModelVar(Op): - """A dummy Op that describes the purpose of a Model variable and contains - meta-information as additional inputs (value and dims). - """ + """A dummy Op that describes the purpose of a Model variable and contains meta-information as additional inputs (value and dims).""" def make_node(self, rv, *dims): assert isinstance(rv, Variable) @@ -151,7 +149,6 @@ def fgraph_from_model( memo: Dict A dictionary mapping original model variables to the equivalent nodes in the fgraph. """ - if any(v is not None for v in model.rvs_to_initial_values.values()): raise NotImplementedError("Cannot convert models with non-default initial_values") diff --git a/pymc/model/transform/__init__.py b/pymc/model/transform/__init__.py index ae0da7db2..008e6f8ff 100644 --- a/pymc/model/transform/__init__.py +++ b/pymc/model/transform/__init__.py @@ -11,3 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""Model transforms.""" diff --git a/pymc/model/transform/basic.py b/pymc/model/transform/basic.py index 76556ae08..3d756785a 100644 --- a/pymc/model/transform/basic.py +++ b/pymc/model/transform/basic.py @@ -29,7 +29,6 @@ def prune_vars_detached_from_observed(model: Model) -> Model: """Prune model variables that are not related to any observed variable in the Model.""" - # Potentials are ambiguous as whether they correspond to likelihood or prior terms, # We simply raise for now if model.potentials: diff --git a/pymc/model/transform/conditioning.py b/pymc/model/transform/conditioning.py index 9bcdce7c4..23e017550 100644 --- a/pymc/model/transform/conditioning.py +++ b/pymc/model/transform/conditioning.py @@ -225,7 +225,7 @@ def change_value_transforms( model: Model, vars_to_transforms: Mapping[ModelVariable, Transform | None], ) -> Model: - """Change the value variables transforms in the model + r"""Change the value variables transforms in the model. Parameters ---------- @@ -309,7 +309,7 @@ def remove_value_transforms( model: Model, vars: Sequence[ModelVariable] | None = None, ) -> Model: - """Remove the value variables transforms in the model + r"""Remove the value variables transforms in the model. Parameters ---------- diff --git a/pymc/model_graph.py b/pymc/model_graph.py index 659647726..08589a828 100644 --- a/pymc/model_graph.py +++ b/pymc/model_graph.py @@ -117,7 +117,7 @@ def __eq__(self, other) -> bool: def default_potential(var: TensorVariable) -> GraphvizNodeKwargs: - """Default data for potential in the graph.""" + """Return default data for potential in the graph.""" return { "shape": "octagon", "style": "filled", @@ -136,7 +136,7 @@ def random_variable_symbol(var: TensorVariable) -> str: def default_free_rv(var: TensorVariable) -> GraphvizNodeKwargs: - """Default data for free RV in the graph.""" + """Return default data for free RV in the graph.""" symbol = random_variable_symbol(var) return { @@ -147,7 +147,7 @@ def default_free_rv(var: TensorVariable) -> GraphvizNodeKwargs: def default_observed_rv(var: TensorVariable) -> GraphvizNodeKwargs: - """Default data for observed RV in the graph.""" + """Return default data for observed RV in the graph.""" symbol = random_variable_symbol(var) return { @@ -158,7 +158,7 @@ def default_observed_rv(var: TensorVariable) -> GraphvizNodeKwargs: def default_deterministic(var: TensorVariable) -> GraphvizNodeKwargs: - """Default data for the deterministic in the graph.""" + """Return default data for the deterministic in the graph.""" return { "shape": "box", "style": None, @@ -167,7 +167,7 @@ def default_deterministic(var: TensorVariable) -> GraphvizNodeKwargs: def default_data(var: TensorVariable) -> GraphvizNodeKwargs: - """Default data for the data in the graph.""" + """Return default data for the data in the graph.""" return { "shape": "box", "style": "rounded, filled", @@ -226,7 +226,7 @@ def _make_node( cluster: str | None = None, formatting: str = "plain", ): - """Attaches the given variable to a graphviz or networkx Digraph""" + """Attaches the given variable to a graphviz or networkx Digraph.""" node_formatter = node_formatters[node.node_type] kwargs = node_formatter(node.var) @@ -311,7 +311,7 @@ def vars_to_plot(self, var_names: Iterable[VarName] | None = None) -> list[VarNa def make_compute_graph( self, var_names: Iterable[VarName] | None = None ) -> dict[VarName, set[VarName]]: - """Get map of var_name -> set(input var names) for the model""" + """Get map of var_name -> set(input var names) for the model.""" input_map: dict[VarName, set[VarName]] = defaultdict(set) for var_name in self.vars_to_plot(var_names): @@ -441,7 +441,7 @@ def make_graph( node_formatters: NodeTypeFormatterMapping | None = None, create_plate_label: PlateLabelFunc = create_plate_label_with_dim_length, ): - """Make graphviz Digraph of PyMC model + """Make graphviz Digraph of PyMC model. Returns ------- @@ -511,7 +511,7 @@ def make_networkx( node_formatters: NodeTypeFormatterMapping | None = None, create_plate_label: PlateLabelFunc = create_plate_label_with_dim_length, ): - """Make networkx Digraph of PyMC model + """Make networkx Digraph of PyMC model. Returns ------- diff --git a/pymc/ode/ode.py b/pymc/ode/ode.py index 7ecb4deba..ca01af13b 100644 --- a/pymc/ode/ode.py +++ b/pymc/ode/ode.py @@ -32,7 +32,7 @@ class DifferentialEquation(Op): r""" - Specify an ordinary differential equation + Specify an ordinary differential equation. Due to the nature of the model (as well as included solvers), the process of ODE solution may perform slowly. A faster alternative library based on PyMC--sunode--has implemented Adams' method and BDF (backward differentation formula). More information about sunode is available at: https://github.com/aseyboldt/sunode. @@ -108,7 +108,9 @@ def __init__(self, func, times, *, n_states, n_theta, t0=0): self._output_sensitivities = {} def _system(self, Y, t, p): - r"""The function that will be passed to odeint. Solves both ODE and sensitivities. + r"""Solve both ODE and sensitivities. + + This function will be passed to odeint. Parameters ---------- diff --git a/pymc/ode/utils.py b/pymc/ode/utils.py index 1ccf7e5ba..3ad05b1e1 100644 --- a/pymc/ode/utils.py +++ b/pymc/ode/utils.py @@ -19,7 +19,9 @@ def make_sens_ic(n_states, n_theta, floatX): r""" - The sensitivity matrix will always have consistent form. (n_states, n_states + n_theta) + Make initial condition for the sensitivity matrix. + + The sensitivity matrix will always have consistent form. (n_states, n_states + n_theta). If the first n_states entries of the parameters vector in the simulate call correspond to initial conditions of the system, @@ -44,7 +46,6 @@ def make_sens_ic(n_states, n_theta, floatX): dydp : array 1D-array of shape (n_states * (n_states + n_theta),), representing the initial condition of the sensitivities """ - # Initialize the sensitivity matrix to be 0 everywhere sens_matrix = np.zeros((n_states, n_states + n_theta), dtype=floatX) @@ -59,7 +60,7 @@ def make_sens_ic(n_states, n_theta, floatX): def augment_system(ode_func, n_states, n_theta): """ - Function to create augmented system. + Create augmented system. Take a function which specifies a set of differential equations and return a compiled function which allows for computation of gradients of the @@ -81,7 +82,6 @@ def augment_system(ode_func, n_states, n_theta): system: function Augemted system of differential equations. """ - # Present state of the system t_y = pt.vector("y", dtype="float64") t_y.tag.test_value = np.ones((n_states,), dtype="float64") diff --git a/pymc/printing.py b/pymc/printing.py index ef417f379..946a8a213 100644 --- a/pymc/printing.py +++ b/pymc/printing.py @@ -37,9 +37,11 @@ def str_for_dist( dist: TensorVariable, formatting: str = "plain", include_params: bool = True ) -> str: - """Make a human-readable string representation of a Distribution in a model, either - LaTeX or plain, optionally with distribution parameter values included.""" + """Make a human-readable string representation of a Distribution in a model. + This can be either LaTeX or plain, optionally with distribution parameter + values included. + """ if include_params: if isinstance(dist.owner.op, RandomVariable) or getattr( dist.owner.op, "extended_signature", None @@ -98,9 +100,11 @@ def str_for_dist( def str_for_model(model: Model, formatting: str = "plain", include_params: bool = True) -> str: - """Make a human-readable string representation of Model, listing all random variables - and their distributions, optionally including parameter values.""" + """Make a human-readable string representation of Model. + This lists all random variables and their distributions, optionally + including parameter values. + """ # Wrap functions to avoid confusing typecheckers sfd = partial(str_for_dist, formatting=formatting, include_params=include_params) sfp = partial( @@ -146,8 +150,11 @@ def str_for_potential_or_deterministic( include_params: bool = True, dist_name: str = "Deterministic", ) -> str: - """Make a human-readable string representation of a Deterministic or Potential in a model, either - LaTeX or plain, optionally with distribution parameter values included.""" + """Make a human-readable string representation of a Deterministic or Potential in a model. + + This can be either LaTeX or plain, optionally with distribution parameter + values included. + """ print_name = var.name if var.name is not None else "" if "latex" in formatting: print_name = r"\text{" + _latex_escape(print_name.strip("$")) + "}" @@ -302,7 +309,5 @@ def _default_repr_pretty(obj: TensorVariable | Model, p, cycle): def _format_underscore(variable: str) -> str: - """ - Escapes all unescaped underscores in the variable name for LaTeX representation. - """ + """Escapes all unescaped underscores in the variable name for LaTeX representation.""" return re.sub(r"(? tuple[list[TensorVariable], TensorVariable]: """ - Create new outputs and input TensorVariables where the non-shared inputs are joined - in a single raveled vector input. + Create new outputs and input TensorVariables where the non-shared inputs are joined in a single raveled vector input. Parameters ---------- @@ -640,15 +633,13 @@ def __call__(self, state): class CallableTensor: - """Turns a symbolic variable with one input into a function that returns symbolic arguments - with the one variable replaced with the input. - """ + """Turns a symbolic variable with one input into a function that returns symbolic arguments with the one variable replaced with the input.""" def __init__(self, tensor): self.tensor = tensor def __call__(self, input): - """Replaces the single input of symbolic variable to be the passed argument. + """Replace the single input of symbolic variable to be the passed argument. Parameters ---------- @@ -729,7 +720,8 @@ def set_default(self, value): def generator(gen, default=None): """ - Generator variable with possibility to set default value and new generator. + Create a generator variable with possibility to set default value and new generator. + If generator is exhausted variable will produce default value if it is not None, else raises `StopIteration` exception that can be caught on runtime. @@ -751,7 +743,7 @@ def generator(gen, default=None): def ix_(*args): """ - PyTensor np.ix_ analog + PyTensor np.ix_ analog. See numpy.lib.index_tricks.ix_ for reference """ @@ -778,14 +770,14 @@ def largest_common_dtype(tensors): def find_rng_nodes( variables: Iterable[Variable], ) -> list[RandomGeneratorSharedVariable]: - """Return shared RNG variables in a graph""" + """Return shared RNG variables in a graph.""" return [ node for node in graph_inputs(variables) if isinstance(node, RandomGeneratorSharedVariable) ] def replace_rng_nodes(outputs: Sequence[TensorVariable]) -> list[TensorVariable]: - """Replace any RNG nodes upstream of outputs by new RNGs of the same type + """Replace any RNG nodes upstream of outputs by new RNGs of the same type. This can be used when combining a pre-existing graph with a cloned one, to ensure RNGs are unique across the two graphs. @@ -809,7 +801,7 @@ def reseed_rngs( rngs: Sequence[SharedVariable], seed: SeedSequenceSeed, ) -> None: - """Create a new set of RandomState/Generator for each rng based on a seed""" + """Create a new set of RandomState/Generator for each rng based on a seed.""" bit_generators = [ np.random.PCG64(sub_seed) for sub_seed in np.random.SeedSequence(seed).spawn(len(rngs)) ] @@ -1092,9 +1084,7 @@ def constant_fold( def rewrite_pregrad(graph): - """Apply simplifying or stabilizing rewrites to graph that are safe to use - pre-grad. - """ + """Apply simplifying or stabilizing rewrites to graph that are safe to use pre-grad.""" return rewrite_graph(graph, include=("canonicalize", "stabilize")) @@ -1142,7 +1132,7 @@ def toposort_replace( def normalize_rng_param(rng: None | Variable) -> Variable: - """Validate rng is a valid type or create a new one if None""" + """Validate rng is a valid type or create a new one if None.""" if rng is None: rng = pytensor.shared(np.random.default_rng()) elif not isinstance(rng.type, RandomType): diff --git a/pymc/sampling/__init__.py b/pymc/sampling/__init__.py index 547250cd5..bb5206ecc 100644 --- a/pymc/sampling/__init__.py +++ b/pymc/sampling/__init__.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""MCMC samplers.""" + from pymc.sampling.deterministic import compute_deterministics from pymc.sampling.forward import * from pymc.sampling.mcmc import * diff --git a/pymc/sampling/forward.py b/pymc/sampling/forward.py index c1504091e..ce00ab74d 100644 --- a/pymc/sampling/forward.py +++ b/pymc/sampling/forward.py @@ -75,7 +75,7 @@ def get_constant_coords(trace_coords: dict[str, np.ndarray], model: Model) -> set: - """Get the set of coords that have remained constant between the trace and model""" + """Get the set of coords that have remained constant between the trace and model.""" constant_coords = set() for dim, coord in trace_coords.items(): current_coord = model.coords.get(dim, None) @@ -284,7 +284,7 @@ def draw( random_seed: RandomState = None, **kwargs, ) -> np.ndarray | list[np.ndarray]: - """Draw samples for one variable or a list of variables + """Draw samples for one variable or a list of variables. Parameters ---------- @@ -346,7 +346,7 @@ def draw( def observed_dependent_deterministics(model: Model): - """Find deterministics that depend directly on observed variables""" + """Find deterministics that depend directly on observed variables.""" deterministics = model.deterministics observed_rvs = set(model.observed_RVs) blockers = model.basic_RVs @@ -754,7 +754,6 @@ def sample_posterior_predictive( """ - _trace: MultiTrace | PointList nchain: int if idata_kwargs is None: diff --git a/pymc/sampling/jax.py b/pymc/sampling/jax.py index c530af8d9..43e1baa87 100644 --- a/pymc/sampling/jax.py +++ b/pymc/sampling/jax.py @@ -92,14 +92,13 @@ def posdefmatrix_fn(value, *inps): def _replace_shared_variables(graph: list[TensorVariable]) -> list[TensorVariable]: - """Replace shared variables in graph by their constant values + """Replace shared variables in graph by their constant values. Raises ------ ValueError If any shared variable contains default_updates """ - shared_variables = [var for var in graph_inputs(graph) if isinstance(var, SharedVariable)] if any(isinstance(var.type, RandomType) for var in shared_variables): @@ -123,8 +122,7 @@ def get_jaxified_graph( inputs: list[TensorVariable] | None = None, outputs: list[TensorVariable] | None = None, ) -> list[TensorVariable]: - """Compile an PyTensor graph into an optimized JAX function""" - + """Compile a PyTensor graph into an optimized JAX function.""" graph = _replace_shared_variables(outputs) if outputs is not None else None fgraph = FunctionGraph(inputs=inputs, outputs=graph, clone=True) @@ -164,7 +162,7 @@ def _get_log_likelihood( backend: Literal["cpu", "gpu"] | None = None, postprocessing_vectorize: Literal["vmap", "scan"] = "scan", ) -> dict: - """Compute log-likelihood for all observations""" + """Compute log-likelihood for all observations.""" elemwise_logp = model.logp(model.observed_RVs, sum=False) jax_fn = get_jaxified_graph(inputs=model.value_vars, outputs=elemwise_logp) result = _postprocess_samples( @@ -216,7 +214,7 @@ def _get_batched_jittered_initial_points( jitter: bool = True, jitter_max_retries: int = 10, ) -> np.ndarray | list[np.ndarray]: - """Get jittered initial point in format expected by NumPyro MCMC kernel + """Get jittered initial point in format expected by NumPyro MCMC kernel. Returns ------- @@ -224,7 +222,6 @@ def _get_batched_jittered_initial_points( list with one item per variable and number of chains as batch dimension. Each item has shape `(chains, *var.shape)` """ - initial_points = _init_jitter( model, initvals, @@ -354,7 +351,6 @@ def _sample_blackjax_nuts( with their respective sample stats and pointwise log likeihood values (unless skipped with ``idata_kwargs``). """ - import blackjax # Adapted from numpyro diff --git a/pymc/sampling/mcmc.py b/pymc/sampling/mcmc.py index 228850e63..1c124eb7a 100644 --- a/pymc/sampling/mcmc.py +++ b/pymc/sampling/mcmc.py @@ -254,7 +254,7 @@ def _print_step_hierarchy(s: Step, level: int = 0) -> None: def all_continuous(vars): - """Check that vars not include discrete variables""" + """Check that vars not include discrete variables.""" if any((var.dtype in discrete_types) for var in vars): return False else: @@ -908,8 +908,10 @@ def _sample_return( idata_kwargs: dict[str, Any], model: Model, ) -> InferenceData | MultiTrace: - """Final step of `pm.sampler` that picks/slices chains, - runs diagnostics and converts to the desired return type.""" + """Pick/slice chains, run diagnostics and convert to the desired return type. + + Final step of `pm.sampler`. + """ # Pick and slice chains to keep the maximum number of samples if discard_tuned_samples: traces, length = _choose_chains(traces, tune) @@ -966,7 +968,7 @@ def _sample_return( def _check_start_shape(model, start: PointType): - """Checks that the prior evaluations and initial points have identical shapes. + """Check that the prior evaluations and initial points have identical shapes. Parameters ---------- @@ -1001,7 +1003,7 @@ def _sample_many( callback: SamplingIteratorCallback | None = None, **kwargs, ): - """Samples all chains sequentially. + """Sample all chains sequentially. Parameters ---------- @@ -1045,7 +1047,7 @@ def _sample( callback=None, **kwargs, ) -> None: - """Main iteration for singleprocess sampling. + """Sample one chain (singleprocess). Multiple step methods are supported via compound step methods. @@ -1125,7 +1127,7 @@ def _iter_sample( model: Model | None = None, callback: SamplingIteratorCallback | None = None, ) -> Iterator[bool]: - """Generator for sampling one chain. (Used in singleprocess sampling.) + """Sample one chain with a generator (singleprocess). Parameters ---------- @@ -1210,7 +1212,7 @@ def _mp_sample( mp_ctx=None, **kwargs, ) -> None: - """Main iteration for multiprocess sampling. + """Sample all chains (multiprocess). Parameters ---------- @@ -1312,7 +1314,6 @@ def _init_jitter( start : ``pymc.model.Point`` Starting point for sampler """ - ipfns = make_initial_point_fns_per_chain( model=model, overrides=initvals, diff --git a/pymc/sampling/parallel.py b/pymc/sampling/parallel.py index 4b76e53a9..6e19c4aeb 100644 --- a/pymc/sampling/parallel.py +++ b/pymc/sampling/parallel.py @@ -50,6 +50,7 @@ def __init__(self, tb): self.tb = tb def __str__(self): + """Return a string representation of the object.""" return self.tb @@ -61,6 +62,7 @@ def __init__(self, exc, tb): self.tb = f'\n"""\n{tb}"""' def __reduce__(self): + """Return a tuple to pickle.""" return rebuild_exc, (self.exc, self.tb) @@ -80,6 +82,7 @@ def rebuild_exc(exc, tb): class _Process: """Separate process for each chain. + We communicate with the main process using a pipe, and send finished samples using shared memory. """ @@ -275,9 +278,7 @@ def __init__( @property def shared_point_view(self): - """May only be written to or read between a `recv_draw` - call from the process and a `write_next` or `abort` call. - """ + """May only be written to or read between a `recv_draw` call from the process and a `write_next` or `abort` call.""" if not self._readable: raise RuntimeError() return self._point @@ -460,6 +461,7 @@ def _make_active(self): self._active.append(proc) def __iter__(self): + """Return an iterator over draws.""" if not self._in_context: raise ValueError("Use ParallelSampler as context manager.") self._make_active() @@ -504,10 +506,12 @@ def __iter__(self): yield Draw(proc.chain, is_last, draw, tuning, stats, point) def __enter__(self): + """Enter the context manager.""" self._in_context = True return self def __exit__(self, *args): + """Exit the context manager.""" ProcessAdapter.terminate_all(self._samplers) diff --git a/pymc/sampling/population.py b/pymc/sampling/population.py index c0dc813b5..4e5a22996 100644 --- a/pymc/sampling/population.py +++ b/pymc/sampling/population.py @@ -62,7 +62,7 @@ def _sample_population( traces: Sequence[BaseTrace], **kwargs, ): - """Performs sampling of a population of chains using the ``PopulationStepper``. + """Perform sampling of a population of chains using the ``PopulationStepper``. Parameters ---------- @@ -234,7 +234,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): @staticmethod def _run_secondary(c, stepper_dumps, secondary_end, task, progress): - """The method is started on a separate process to perform stepping of a chain. + """Perform stepping of a chain from a separate process. Parameters ---------- diff --git a/pymc/smc/__init__.py b/pymc/smc/__init__.py index 4608b39ce..4d6f90eab 100644 --- a/pymc/smc/__init__.py +++ b/pymc/smc/__init__.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Sequential Monte Carlo samplers.""" + from pymc.smc.kernels import IMH, MH from pymc.smc.sampling import sample_smc diff --git a/pymc/smc/kernels.py b/pymc/smc/kernels.py index 2bfde7583..608454ef3 100644 --- a/pymc/smc/kernels.py +++ b/pymc/smc/kernels.py @@ -161,7 +161,6 @@ def __init__( Dictionary that contains information about model variables shape and size. """ - self.draws = draws self.start = start if threshold < 0 or threshold > 1: @@ -187,7 +186,7 @@ def __init__( self.weights = np.ones(self.draws) / self.draws def initialize_population(self) -> dict[str, np.ndarray]: - """Create an initial population from the prior distribution""" + """Create an initial population from the prior distribution.""" sys.stdout.write(" ") # see issue #5828 with warnings.catch_warnings(): warnings.filterwarnings( @@ -213,7 +212,7 @@ def initialize_population(self) -> dict[str, np.ndarray]: return cast(dict[str, np.ndarray], dict_prior) def _initialize_kernel(self): - """Create variables and logp function necessary to run SMC kernel + """Create variables and logp function necessary to run SMC kernel. This method should not be overwritten. If needed, use `setup_kernel` instead. @@ -253,11 +252,11 @@ def _initialize_kernel(self): self.likelihood_logp = np.array(likelihoods).squeeze() def setup_kernel(self): - """Setup logic performed once before sampling starts""" + """Perform setup logic once before sampling starts.""" pass def update_beta_and_weights(self): - """Calculate the next inverse temperature (beta) + """Calculate the next inverse temperature (beta). The importance weights based on two successive tempered likelihoods (i.e. two successive values of beta) and updates the marginal likelihood estimate. @@ -294,7 +293,7 @@ def update_beta_and_weights(self): self.log_marginal_likelihood += logsumexp(log_weights_un) - np.log(self.draws) def resample(self): - """Resample particles based on importance weights""" + """Resample particles based on importance weights.""" self.resampling_indexes = systematic_resampling(self.weights, self.rng) self.tempered_posterior = self.tempered_posterior[self.resampling_indexes] @@ -304,16 +303,16 @@ def resample(self): self.tempered_posterior_logp = self.prior_logp + self.likelihood_logp * self.beta def tune(self): - """Tuning logic performed before every mutation step""" + """Tuning logic performed before every mutation step.""" pass @abc.abstractmethod def mutate(self): - """Apply kernel-specific perturbation to the particles once per stage""" + """Apply kernel-specific perturbation to the particles once per stage.""" pass def sample_stats(self) -> SMCStats: - """Stats to be saved at the end of each stage + """Stats to be saved at the end of each stage. These stats will be saved under `sample_stats` in the final InferenceData object. """ @@ -334,7 +333,7 @@ def sample_settings(self) -> SMCSettings: } def _posterior_to_trace(self, chain=0) -> NDArray: - """Save results into a PyMC trace + """Save results into a PyMC trace. This method should not be overwritten. """ @@ -361,10 +360,12 @@ def _posterior_to_trace(self, chain=0) -> NDArray: class IMH(SMC_KERNEL): - """Independent Metropolis-Hastings SMC_kernel""" + """Independent Metropolis-Hastings SMC_kernel.""" def __init__(self, *args, correlation_threshold=0.01, **kwargs): """ + Create the Independent Metropolis-Hastings SMC kernel object. + Parameters ---------- correlation_threshold : float, default 0.01 @@ -467,10 +468,12 @@ def get(self, b): class MH(SMC_KERNEL): - """Metropolis-Hastings SMC_kernel""" + """Metropolis-Hastings SMC_kernel.""" def __init__(self, *args, correlation_threshold=0.01, **kwargs): """ + Create a Metropolis-Hastings SMC kernel. + Parameters ---------- correlation_threshold : float, default 0.01 @@ -490,7 +493,8 @@ def __init__(self, *args, correlation_threshold=0.01, **kwargs): def setup_kernel(self): """Proposal dist is just a Multivariate Normal with unit identity covariance. - Dimension specific scaling is provided by `self.proposal_scales` and set in `self.tune()` + + Dimension specific scaling is provided by `self.proposal_scales` and set in `self.tune()`. """ ndim = self.tempered_posterior.shape[1] self.proposal_scales = np.full(self.draws, min(1, 2.38**2 / ndim)) @@ -502,7 +506,7 @@ def resample(self): self.chain_acc_rate = self.chain_acc_rate[self.resampling_indexes] def tune(self): - """Update proposal scales for each particle dimension and update number of MH steps""" + """Update proposal scales for each particle dimension and update number of MH steps.""" if self.iteration > 1: # Rescale based on distance to 0.234 acceptance rate chain_scales = np.exp(np.log(self.proposal_scales) + (self.chain_acc_rate - 0.234)) @@ -614,7 +618,6 @@ def _logp_forw(point, out_vars, in_vars, shared): shared : list Containing TensorVariable for depended shared data """ - # Replace integer inputs with rounded float inputs if any(var.dtype in discrete_types for var in in_vars): replace_int_input = {} diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py index 4cd3c1177..155d53164 100644 --- a/pymc/smc/sampling.py +++ b/pymc/smc/sampling.py @@ -150,7 +150,6 @@ def sample_smc( `link `__ """ - if isinstance(kernel, str) and kernel.lower() in ("abc", "metropolis"): warnings.warn( f'The kernel string argument "{kernel}" in sample_smc has been deprecated. ' diff --git a/pymc/stats/convergence.py b/pymc/stats/convergence.py index 47359365e..eee667782 100644 --- a/pymc/stats/convergence.py +++ b/pymc/stats/convergence.py @@ -131,7 +131,7 @@ def run_convergence_checks(idata: arviz.InferenceData, model) -> list[SamplerWar def warn_divergences(idata: arviz.InferenceData) -> list[SamplerWarning]: - """Checks sampler stats and creates a list of warnings about divergences.""" + """Check sampler stats and creates a list of warnings about divergences.""" sampler_stats = idata.get("sample_stats", None) if sampler_stats is None: return [] @@ -153,7 +153,7 @@ def warn_divergences(idata: arviz.InferenceData) -> list[SamplerWarning]: def warn_treedepth(idata: arviz.InferenceData) -> list[SamplerWarning]: - """Checks sampler stats and creates a list of warnings about tree depth.""" + """Check sampler stats and creates a list of warnings about tree depth.""" sampler_stats = idata.get("sample_stats", None) if sampler_stats is None: return [] @@ -187,7 +187,7 @@ def log_warnings(warnings: Sequence[SamplerWarning]): def log_warning_stats(stats: Sequence[dict[str, Any]]): - """Logs 'warning' stats if present.""" + """Log 'warning' stats if present.""" if stats is None: return diff --git a/pymc/stats/log_density.py b/pymc/stats/log_density.py index 3216e26f3..266ceaac1 100644 --- a/pymc/stats/log_density.py +++ b/pymc/stats/log_density.py @@ -38,7 +38,7 @@ def compute_log_likelihood( progressbar=True, compile_kwargs: dict[str, Any] | None = None, ): - """Compute elemwise log_likelihood of model given InferenceData with posterior group + """Compute elemwise log_likelihood of model given InferenceData with posterior group. Parameters ---------- @@ -81,7 +81,7 @@ def compute_log_prior( progressbar=True, compile_kwargs=None, ): - """Compute elemwise log_prior of model given InferenceData with posterior group + """Compute elemwise log_prior of model given InferenceData with posterior group. Parameters ---------- @@ -127,7 +127,7 @@ def compute_log_density( compile_kwargs=None, ) -> InferenceData | Dataset: """ - Compute elemwise log_likelihood or log_prior of model given InferenceData with posterior group + Compute elemwise log_likelihood or log_prior of model given InferenceData with posterior group. Parameters ---------- @@ -154,7 +154,6 @@ def compute_log_density( InferenceData with the ``log_likelihood`` group when ``kind == "likelihood"`` or the ``log_prior`` group when ``kind == "prior"``. """ - posterior = idata["posterior"] model = modelcontext(model) diff --git a/pymc/step_methods/__init__.py b/pymc/step_methods/__init__.py index 5f44acc72..47fabc10d 100644 --- a/pymc/step_methods/__init__.py +++ b/pymc/step_methods/__init__.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Step methods.""" + from pymc.step_methods.compound import BlockedStep, CompoundStep from pymc.step_methods.hmc import NUTS, HamiltonianMC from pymc.step_methods.metropolis import ( diff --git a/pymc/step_methods/arraystep.py b/pymc/step_methods/arraystep.py index bddf02f15..b7da80aee 100644 --- a/pymc/step_methods/arraystep.py +++ b/pymc/step_methods/arraystep.py @@ -75,8 +75,10 @@ def astep(self, apoint: RaveledVars, *args) -> tuple[RaveledVars, StatsType]: class ArrayStepShared(BlockedStep): - """Faster version of ArrayStep that requires the substep method that does not wrap - the functions the step method uses. + """Faster version of ArrayStep. + + It requires the substep method that does not wrap the functions the step + method uses. Works by setting shared variables before using the step. This eliminates the mapping and unmapping overhead as well as moving fewer variables around. @@ -84,6 +86,8 @@ class ArrayStepShared(BlockedStep): def __init__(self, vars, shared, blocked=True, rng: RandomGenerator = None): """ + Create the ArrayStepShared object. + Parameters ---------- vars: list of sampling value variables @@ -122,14 +126,15 @@ def astep(self, q0: RaveledVars) -> tuple[RaveledVars, StatsType]: class PopulationArrayStepShared(ArrayStepShared): - """Version of ArrayStepShared that allows samplers to access the states - of other chains in the population. + """Version of ArrayStepShared that allows samplers to access the states of other chains in the population. Works by linking a list of Points that is updated as the chains are iterated. """ def __init__(self, vars, shared, blocked=True, rng: RandomGenerator = None): """ + Create the PopulationArrayStepShared object. + Parameters ---------- vars: list of sampling value variables diff --git a/pymc/step_methods/compound.py b/pymc/step_methods/compound.py index 87dd30420..253e0bd04 100644 --- a/pymc/step_methods/compound.py +++ b/pymc/step_methods/compound.py @@ -13,7 +13,7 @@ # limitations under the License. """ -Created on Mar 7, 2011 +Created on Mar 7, 2011. @author: johnsalvatier """ @@ -40,6 +40,7 @@ @unique class Competence(IntEnum): """Enum for characterizing competence classes of step methods. + Values include: 0: INCOMPATIBLE 1: COMPATIBLE @@ -58,7 +59,7 @@ def infer_warn_stats_info( sds: dict[str, tuple[StatDtype, StatShape]], stepname: str, ) -> tuple[list[dict[str, StatDtype]], dict[str, tuple[StatDtype, StatShape]]]: - """Helper function to get `stats_dtypes` and `stats_dtypes_shapes` from either of them.""" + """Get `stats_dtypes` and `stats_dtypes_shapes` from either of them.""" # Avoid side-effects on the original lists/dicts stats_dtypes = [d.copy() for d in stats_dtypes] sds = sds.copy() @@ -213,7 +214,7 @@ def flat_statname(sampler_idx: int, sname: str) -> str: def get_stats_dtypes_shapes_from_steps( steps: Iterable[BlockedStep], ) -> dict[str, tuple[StatDtype, StatShape]]: - """Combines stats dtype shape dictionaries from multiple step methods. + """Combine stats dtype shape dictionaries from multiple step methods. In the resulting stats dict, each sampler stat is prefixed by `sampler_#__`. """ @@ -233,8 +234,7 @@ def __init__(self, methods: list[StepMethodState]): class CompoundStep(WithSamplingState): - """Step method composed of a list of several other step - methods applied in sequence.""" + """Step method composed of a list of several other step methods applied in sequence.""" _state_class = CompoundStepState diff --git a/pymc/step_methods/hmc/__init__.py b/pymc/step_methods/hmc/__init__.py index c6f0d2b8b..8ec9f91ac 100644 --- a/pymc/step_methods/hmc/__init__.py +++ b/pymc/step_methods/hmc/__init__.py @@ -12,5 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Hamiltonian Monte Carlo.""" + from pymc.step_methods.hmc.hmc import HamiltonianMC from pymc.step_methods.hmc.nuts import NUTS diff --git a/pymc/step_methods/hmc/nuts.py b/pymc/step_methods/hmc/nuts.py index 9bcde9510..fb816954b 100644 --- a/pymc/step_methods/hmc/nuts.py +++ b/pymc/step_methods/hmc/nuts.py @@ -225,7 +225,6 @@ def _hamiltonian_step(self, start, p0, step_size): @staticmethod def competence(var, has_grad): """Check how appropriate this class is for sampling a random variable.""" - if var.dtype in continuous_types and has_grad: return Competence.PREFERRED return Competence.INCOMPATIBLE diff --git a/pymc/step_methods/hmc/quadpotential.py b/pymc/step_methods/hmc/quadpotential.py index 05da188f9..53185bbb8 100644 --- a/pymc/step_methods/hmc/quadpotential.py +++ b/pymc/step_methods/hmc/quadpotential.py @@ -842,7 +842,7 @@ class WeightedCovarianceState(DataClassState): class _WeightedCovariance(WithSamplingState): - """Online algorithm for computing mean and covariance + """Online algorithm for computing mean and covariance. This implements the `Welford's algorithm `_ based diff --git a/pymc/step_methods/metropolis.py b/pymc/step_methods/metropolis.py index 21fb6c83e..15b3a3b2b 100644 --- a/pymc/step_methods/metropolis.py +++ b/pymc/step_methods/metropolis.py @@ -134,7 +134,7 @@ class MetropolisState(StepMethodState): class Metropolis(ArrayStepShared): - """Metropolis-Hastings sampling step""" + """Metropolis-Hastings sampling step.""" name = "metropolis" @@ -161,7 +161,7 @@ def __init__( rng=None, **kwargs, ): - """Create an instance of a Metropolis stepper + """Create an instance of a Metropolis stepper. Parameters ---------- @@ -187,7 +187,6 @@ def __init__( :py:class:`~numpy.random.Generator` object. Refer to :py:func:`pymc.util.get_random_generator` for more information. """ - model = pm.modelcontext(model) initial_values = model.initial_point() @@ -255,7 +254,7 @@ def __init__( super().__init__(vars, shared, rng=rng) def reset_tuning(self): - """Resets the tuned sampler parameters to their initial values.""" + """Reset the tuned sampler parameters to their initial values.""" for attr, initial_value in self._untuned_settings.items(): setattr(self, attr, initial_value) self.accepted_sum[:] = 0 @@ -324,8 +323,9 @@ def competence(var, has_grad): def tune(scale, acc_rate): """ - Tunes the scaling parameter for the proposal distribution - according to the acceptance rate over the last tune_interval: + Tune the scaling parameter for the proposal distribution. + + Uses the acceptance rate over the last tune_interval. Rate Variance adaptation ---- ------------------- @@ -381,7 +381,7 @@ class BinaryMetropolisState(StepMethodState): class BinaryMetropolis(ArrayStep): - """Metropolis-Hastings optimized for binary variables + """Metropolis-Hastings optimized for binary variables. Parameters ---------- @@ -457,10 +457,7 @@ def astep(self, apoint: RaveledVars, *args) -> tuple[RaveledVars, StatsType]: @staticmethod def competence(var): - """ - BinaryMetropolis is only suitable for binary (bool) - and Categorical variables with k=1. - """ + """BinaryMetropolis is only suitable for binary (bool) and Categorical variables with k=1.""" distribution = getattr(var.owner, "op", None) if isinstance(distribution, BernoulliRV): @@ -490,7 +487,7 @@ class BinaryGibbsMetropolisState(StepMethodState): class BinaryGibbsMetropolis(ArrayStep): - """A Metropolis-within-Gibbs step method optimized for binary variables + """A Metropolis-within-Gibbs step method optimized for binary variables. Parameters ---------- @@ -579,10 +576,7 @@ def astep(self, apoint: RaveledVars, *args) -> tuple[RaveledVars, StatsType]: @staticmethod def competence(var): - """ - BinaryMetropolis is only suitable for Bernoulli - and Categorical variables with k=2. - """ + """BinaryMetropolis is only suitable for Bernoulli and Categorical variables with k=2.""" distribution = getattr(var.owner, "op", None) if isinstance(distribution, BernoulliRV): @@ -756,10 +750,7 @@ def metropolis_proportional(self, q, logp, logp_curr, dim, k): @staticmethod def competence(var): - """ - CategoricalGibbsMetropolis is only suitable for Bernoulli and - Categorical variables. - """ + """CategoricalGibbsMetropolis is only suitable for Bernoulli and Categorical variables.""" distribution = getattr(var.owner, "op", None) if isinstance(distribution, CategoricalRV): @@ -1077,7 +1068,7 @@ def __init__( super().__init__(vars, shared, rng=rng) def reset_tuning(self): - """Resets the tuned sampler parameters and history to their initial values.""" + """Reset the tuned sampler parameters and history to their initial values.""" # history can't be reset via the _untuned_settings dict because it's a list self._history = [] for attr, initial_value in self._untuned_settings.items(): @@ -1136,8 +1127,9 @@ def astep(self, q0: RaveledVars) -> tuple[RaveledVars, StatsType]: return RaveledVars(q_new, point_map_info), [stats] def stop_tuning(self): - """At the end of the tuning phase, this method removes the first x% of the history - so future proposals are not informed by unconverged tuning iterations. + """Remove the first x% of the history at the end of the tuning phase. + + This is so future proposals are not informed by unconverged tuning iterations. """ it = len(self._history) n_drop = int(self.tune_drop_fraction * it) diff --git a/pymc/testing.py b/pymc/testing.py index 7a43c6376..943e2355e 100644 --- a/pymc/testing.py +++ b/pymc/testing.py @@ -68,7 +68,7 @@ def product(domains, n_samples=-1): must be "domain-like", as in, have a `.vals` property n_samples: int, maximum samples to return. -1 to return whole product - Returns: + Returns ------- list of the cartesian product of the domains """ @@ -114,6 +114,7 @@ def __init__(self, vals, dtype=pytensor.config.floatX, edges=None, shape=None): self.dtype = dtype def __add__(self, other): + """Add two domains.""" return Domain( [v + other for v in self.vals], self.dtype, @@ -122,6 +123,7 @@ def __add__(self, other): ) def __mul__(self, other): + """Multiply two domains.""" try: return Domain( [v * other for v in self.vals], @@ -138,6 +140,7 @@ def __mul__(self, other): ) def __neg__(self): + """Negate one domain.""" return Domain([-v for v in self.vals], self.dtype, (-self.lower, -self.upper), self.shape) @@ -223,7 +226,7 @@ def RandomPdMatrix(n): def select_by_precision(float64, float32): - """Helper function to choose reasonable decimal cutoffs for different floatX modes.""" + """Choose reasonable decimal cutoffs for different floatX modes.""" decimal = float64 if pytensor.config.floatX == "float64" else float32 return decimal @@ -311,10 +314,8 @@ def check_logp( skip_paramdomain_outside_edge_test: bool = False, ) -> None: """ - Generic test for PyMC logp methods + Test PyMC logp and equivalent scipy logpmf/logpdf methods give similar results for valid values and parameters inside the supported edges. - Test PyMC logp and equivalent scipy logpmf/logpdf methods give similar - results for valid values and parameters inside the supported edges. Edges are excluded by default, but can be artificially included by creating a domain with repeated values (e.g., `Domain([0, 0, .5, 1, 1]`) @@ -421,7 +422,7 @@ def check_logcdf( skip_paramdomain_outside_edge_test: bool = False, ) -> None: """ - Generic test for PyMC logcdf methods + Test PyMC logcdf and equivalent scipy logcdf methods give similar results for valid values and parameters inside the supported edges. The following tests are performed by default: 1. Test PyMC logcdf and equivalent scipy logcdf methods give similar @@ -536,7 +537,7 @@ def check_icdf( n_samples: int = 100, ) -> None: """ - Generic test for PyMC icdf methods + Test PyMC icdf and equivalent scipy icdf methods give similar results for valid values and parameters inside the supported edges. The following tests are performed by default: 1. Test PyMC icdf and equivalent scipy icdf (ppf) methods give similar @@ -633,9 +634,7 @@ def check_selfconsistency_discrete_logcdf( decimal: int | None = None, n_samples: int = 100, ) -> None: - """ - Check that logcdf of discrete distributions matches sum of logps up to value. - """ + """Check that logcdf of discrete distributions matches sum of logps up to value.""" if decimal is None: decimal = select_by_precision(float64=6, float32=3) @@ -796,8 +795,9 @@ def discrete_random_tester( class BaseTestDistributionRandom: """ - Base class for tests that new RandomVariables are correctly - implemented, and that the mapping of parameters between the PyMC + Base class for tests that new RandomVariables are correctly implemented. + + Also checks that the mapping of parameters between the PyMC Distribution and the respective RandomVariable is correct. Three default tests are provided which check: @@ -979,7 +979,6 @@ def seeded_numpy_distribution_builder(dist_name: str) -> Callable: def assert_no_rvs(vars: Sequence[Variable]) -> None: """Assert that there are no `MeasurableOp` nodes in a graph.""" - rvs = rvs_in_graph(vars) if rvs: raise AssertionError(f"RV found in graph: {rvs}") diff --git a/pymc/tuning/__init__.py b/pymc/tuning/__init__.py index a00dd3fee..f2920849b 100644 --- a/pymc/tuning/__init__.py +++ b/pymc/tuning/__init__.py @@ -12,5 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Tuning phase.""" + from pymc.tuning.scaling import find_hessian, guess_scaling, trace_cov from pymc.tuning.starting import find_MAP diff --git a/pymc/tuning/scaling.py b/pymc/tuning/scaling.py index 08d267adb..56e1fb33b 100644 --- a/pymc/tuning/scaling.py +++ b/pymc/tuning/scaling.py @@ -26,7 +26,7 @@ def fixed_hessian(point, model=None): """ - Returns a fixed Hessian for any chain location. + Return a fixed Hessian for any chain location. Parameters ---------- @@ -35,7 +35,6 @@ def fixed_hessian(point, model=None): vars: list Variables for which Hessian is to be calculated. """ - model = modelcontext(model) point = Point(point, model=model) @@ -45,7 +44,7 @@ def fixed_hessian(point, model=None): def find_hessian(point, vars=None, model=None, negate_output=True): """ - Returns Hessian of logp at the point passed. + Return Hessian of logp at the point passed. Parameters ---------- @@ -61,7 +60,7 @@ def find_hessian(point, vars=None, model=None, negate_output=True): def find_hessian_diag(point, vars=None, model=None, negate_output=True): """ - Returns Hessian of logp at the point passed. + Return Hessian of logp at the point passed. Parameters ---------- @@ -110,7 +109,7 @@ def eig_recompose(val, vec): def trace_cov(trace, vars=None, model=None): """ - Calculate the flattened covariance matrix using a sample trace + Calculate the flattened covariance matrix using a sample trace. Useful if you want to base your covariance matrix for further sampling on some initial samples. diff --git a/pymc/tuning/starting.py b/pymc/tuning/starting.py index cb8ae010d..326401007 100644 --- a/pymc/tuning/starting.py +++ b/pymc/tuning/starting.py @@ -13,7 +13,7 @@ # limitations under the License. """ -Created on Mar 12, 2011 +Created on Mar 12, 2011. @author: johnsalvatier """ @@ -62,7 +62,7 @@ def find_MAP( seed: int | None = None, **kwargs, ): - """Finds the local maximum a posteriori point given a model. + """Find the local maximum a posteriori point given a model. `find_MAP` should not be used to initialize the NUTS sampler. Simply call ``pymc.sample()`` and it will automatically initialize NUTS in a better diff --git a/pymc/util.py b/pymc/util.py index 7733d41b6..41520587c 100644 --- a/pymc/util.py +++ b/pymc/util.py @@ -71,7 +71,7 @@ def __repr__(self): def withparent(meth): - """Helper wrapper that passes calls to parent's instance""" + """Pass calls to parent's instance.""" def wrapped(self, *args, **kwargs): res = meth(self, *args, **kwargs) @@ -87,9 +87,9 @@ def wrapped(self, *args, **kwargs): class treelist(list): - """A list that passes mutable extending operations used in Model - to parent list instance. - Extending treelist you will also extend its parent + """A list that passes mutable extending operations used in Model to parent list instance. + + Extending treelist you will also extend its parent. """ def __init__(self, iterable=(), parent=None): @@ -113,6 +113,7 @@ def tree_contains(self, item): return list.__contains__(self, item) def __setitem__(self, key, value): + """Set value at index `key` with value `value`.""" raise NotImplementedError( "Method is removed as we are not able to determine appropriate logic for it" ) @@ -121,9 +122,11 @@ def __setitem__(self, key, value): # This is my best guess about what this should do. I might be happier # to kill both of these if they are not used. def __mul__(self, other) -> "treelist": + """Multiplication.""" return cast("treelist", super().__mul__(other)) def __imul__(self, other) -> "treelist": + """Inplace multiplication.""" t0 = len(self) super().__imul__(other) if self.parent is not None: @@ -132,9 +135,9 @@ def __imul__(self, other) -> "treelist": class treedict(dict): - """A dict that passes mutable extending operations used in Model - to parent dict instance. - Extending treedict you will also extend its parent + """A dict that passes mutable extending operations used in Model to parent dict instance. + + Extending treedict you will also extend its parent. """ def __init__(self, iterable=(), parent=None, **kwargs): @@ -160,7 +163,7 @@ def tree_contains(self, item): def get_transformed_name(name, transform): r""" - Consistent way of transforming names + Consistent way of transforming names. Parameters ---------- @@ -179,7 +182,7 @@ def get_transformed_name(name, transform): def is_transformed_name(name): r""" - Quickly check if a name was transformed with `get_transformed_name` + Quickly check if a name was transformed with `get_transformed_name`. Parameters ---------- @@ -196,7 +199,7 @@ def is_transformed_name(name): def get_untransformed_name(name): r""" - Undo transformation in `get_transformed_name`. Throws ValueError if name wasn't transformed + Undo transformation in `get_transformed_name`. Throws ValueError if name wasn't transformed. Parameters ---------- @@ -214,7 +217,7 @@ def get_untransformed_name(name): def get_default_varnames(var_iterator, include_transformed): - r"""Helper to extract default varnames from a trace. + r"""Extract default varnames from a trace. Parameters ---------- @@ -264,7 +267,7 @@ def enhanced(*args, **kwargs): def drop_warning_stat(idata: arviz.InferenceData) -> arviz.InferenceData: - """Returns a new ``InferenceData`` object with the "warning" stat removed from sample stats groups. + """Return a new ``InferenceData`` object with the "warning" stat removed from sample stats groups. This function should be applied to an ``InferenceData`` object obtained with ``pm.sample(keep_warning_stat=True)`` before trying to ``.to_netcdf()`` or ``.to_zarr()`` it. @@ -298,7 +301,8 @@ def chains_and_samples(data: xarray.Dataset | arviz.InferenceData) -> tuple[int, def hashable(a=None) -> int: """ - Hashes many kinds of objects, including some that are unhashable through the builtin `hash` function. + Hash many kinds of objects, including some that are unhashable through the builtin `hash` function. + Lists and tuples are hashed based on their elements. """ if isinstance(a, dict): @@ -334,25 +338,31 @@ def __init__(self, obj): self.obj = obj def __hash__(self): + """Return a hash of the object.""" return hashable(self.obj) def __eq__(self, other): + """Compare this object with `other`.""" return self.obj == other def __repr__(self): + """Return a string representation of the object.""" return f"{type(self).__name__}({self.obj})" class WithMemoization: def __hash__(self): + """Return a hash of the object.""" return hash(id(self)) def __getstate__(self): + """Return an object to pickle.""" state = self.__dict__.copy() state.pop("_cache", None) return state def __setstate__(self, state): + """Set the object from a pickled object.""" self.__dict__.update(state) @@ -369,7 +379,7 @@ def cf(self): def check_dist_not_registered(dist, model=None): - """Check that a dist is not registered in the model already""" + """Check that a dist is not registered in the model already.""" from pymc.model import modelcontext try: @@ -386,8 +396,10 @@ def check_dist_not_registered(dist, model=None): def point_wrapper(core_function): - """Wrap an pytensor compiled function to be able to ingest point dictionaries whilst - ignoring the keys that are not valid inputs to the core function. + """ + Wrap a pytensor compiled function to ingest point dictionaries. + + It ignores the keys that are not valid inputs to the core function. """ ins = [i.name for i in core_function.maker.fgraph.inputs if not isinstance(i, SharedVariable)] @@ -459,7 +471,7 @@ def _get_unique_seeds_per_chain(integers_fn): def get_value_vars_from_user_vars(vars: Variable | Sequence[Variable], model) -> list[Variable]: - """Converts user "vars" input into value variables. + """Convert user "vars" input into value variables. More often than not, users will pass random variables, and we will extract the respective value variables, but we also allow for the input to already be value @@ -531,7 +543,7 @@ def makeiter(a): class CustomProgress(Progress): - """A child of Progress that allows to disable progress bars and its container + """A child of Progress that allows to disable progress bars and its container. The implementation simply checks an `is_enabled` flag and generates the progress bar only if it's `True`. @@ -543,11 +555,13 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def __enter__(self): + """Enter the context manager.""" if self.is_enabled: self.start() return self def __exit__(self, exc_type, exc_val, exc_tb): + """Exit the context manager.""" if self.is_enabled: super().__exit__(exc_type, exc_val, exc_tb) diff --git a/pymc/variational/__init__.py b/pymc/variational/__init__.py index 0ba558f58..785fb11cb 100644 --- a/pymc/variational/__init__.py +++ b/pymc/variational/__init__.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Variational Monte Carlo.""" + # commonly used from pymc.variational import ( approximations, diff --git a/pymc/variational/approximations.py b/pymc/variational/approximations.py index 1f42e5528..61940418b 100644 --- a/pymc/variational/approximations.py +++ b/pymc/variational/approximations.py @@ -40,10 +40,12 @@ @Group.register class MeanFieldGroup(Group): - R"""Mean Field approximation to the posterior where spherical Gaussian family - is fitted to minimize KL divergence from True posterior. It is assumed - that latent space variables are uncorrelated that is the main drawback - of the method + """Mean Field approximation to the posterior. + + Spherical Gaussian family is fitted to minimize KL divergence from posterior. + + It is assumed that latent space variables are uncorrelated that is the main + drawback of the method. """ __param_spec__ = {"mu": ("d",), "rho": ("d",)} @@ -116,10 +118,12 @@ def symbolic_logq_not_scaled(self): @Group.register class FullRankGroup(Group): - """Full Rank approximation to the posterior where Multivariate Gaussian family - is fitted to minimize KL divergence from True posterior. In contrast to - MeanField approach correlations between variables are taken in account. The - main drawback of the method is computational cost. + """Full Rank approximation to the posterior. + + Multivariate Gaussian family is fitted to minimize KL divergence from posterior. + + In contrast to MeanField approach, correlations between variables are taken + into account. The main drawback of the method is its computational cost. """ __param_spec__ = {"mu": ("d",), "L_tril": ("int(d * (d + 1) / 2)",)} @@ -188,8 +192,9 @@ def symbolic_random(self): @Group.register class EmpiricalGroup(Group): - """Builds Approximation instance from a given trace, - it has the same interface as variational approximation + """Builds Approximation instance from a given trace. + + It has the same interface as variational approximation. """ has_logq = False @@ -330,7 +335,7 @@ def sample_approx(approx, draws=100, include_transformed=True): # single group shortcuts exported to user class SingleGroupApproximation(Approximation): - """Base class for Single Group Approximation""" + """Base class for Single Group Approximation.""" _group_class: type | None = None @@ -372,7 +377,7 @@ def __init__(self, trace=None, size=None, **kwargs): def evaluate_over_trace(self, node): R""" - Allows to statically evaluate any symbolic expression over the trace. + Allow to statically evaluate any symbolic expression over the trace. Parameters ---------- diff --git a/pymc/variational/callbacks.py b/pymc/variational/callbacks.py index 820e9d7b8..2fe4aa7f1 100644 --- a/pymc/variational/callbacks.py +++ b/pymc/variational/callbacks.py @@ -43,7 +43,7 @@ def absolute(current: np.ndarray, prev: np.ndarray) -> np.ndarray: class CheckParametersConvergence(Callback): - """Convergence stopping check + """Convergence stopping check. Parameters ---------- @@ -93,7 +93,7 @@ def flatten_shared(shared_list): class Tracker(Callback): """ - Helper class to record arbitrary stats during VI + Helper class to record arbitrary stats during VI. It is possible to pass a function that takes no arguments If call fails then (approx, hist, i) are passed @@ -149,6 +149,7 @@ def clear(self): self.hist = collections.defaultdict(list) def __getitem__(self, item): + """Get the element at index `item`.""" return self.hist[item] __call__ = record diff --git a/pymc/variational/inference.py b/pymc/variational/inference.py index 9a400bb1e..3dcb59b59 100644 --- a/pymc/variational/inference.py +++ b/pymc/variational/inference.py @@ -45,7 +45,7 @@ class Inference: - r"""**Base class for Variational Inference** + r"""**Base class for Variational Inference**. Communicates Operator, Approximation and Test Function to build Objective Function @@ -101,7 +101,7 @@ def fit( progressbar_theme=default_progress_theme, **kwargs, ): - """Perform Operator Variational Inference + """Perform Operator Variational Inference. Parameters ---------- @@ -206,7 +206,7 @@ def _iterate_without_loss(self, s, n, step_func, progressbar, progressbar_theme, def _iterate_with_loss(self, s, n, step_func, progressbar, progressbar_theme, callbacks): def _infmean(input_array): - """Return the mean of the finite values of the array""" + """Return the mean of the finite values of the array.""" input_array = input_array[np.isfinite(input_array)].astype("float64") if len(input_array) == 0: return np.nan @@ -285,7 +285,7 @@ def _infmean(input_array): return State(i + s, step=step_func, callbacks=callbacks, score=True) def refine(self, n, progressbar=True, progressbar_theme=default_progress_theme): - """Refine the solution using the last compiled step function""" + """Refine the solution using the last compiled step function.""" if self.state is None: raise TypeError("Need to call `.fit` first") i, step, callbacks, score = self.state @@ -299,7 +299,7 @@ def refine(self, n, progressbar=True, progressbar_theme=default_progress_theme): class KLqp(Inference): - r"""**Kullback Leibler Divergence Inference** + r"""**Kullback Leibler Divergence Inference**. General approach to fit Approximations that define :math:`logq` by maximizing ELBO (Evidence Lower Bound). In some cases @@ -328,7 +328,7 @@ def __init__(self, approx, beta=1.0): class ADVI(KLqp): - r"""**Automatic Differentiation Variational Inference (ADVI)** + r"""**Automatic Differentiation Variational Inference (ADVI)**. This class implements the meanfield ADVI, where the variational posterior distribution is assumed to be spherical Gaussian without @@ -472,7 +472,7 @@ def __init__(self, *args, **kwargs): class FullRankADVI(KLqp): - r"""**Full Rank Automatic Differentiation Variational Inference (ADVI)** + r"""**Full Rank Automatic Differentiation Variational Inference (ADVI)**. Parameters ---------- @@ -501,7 +501,7 @@ def __init__(self, *args, **kwargs): class ImplicitGradient(Inference): - """**Implicit Gradient for Variational Inference** + """**Implicit Gradient for Variational Inference**. **not suggested to use** @@ -517,7 +517,7 @@ def __init__(self, approx, estimator=KSD, kernel=test_functions.rbf, **kwargs): class SVGD(ImplicitGradient): - r"""**Stein Variational Gradient Descent** + r"""**Stein Variational Gradient Descent**. This inference is based on Kernelized Stein Discrepancy it's main idea is to move initial noisy particles so that @@ -585,7 +585,7 @@ def __init__( class ASVGD(ImplicitGradient): - r"""**Amortized Stein Variational Gradient Descent** + r"""**Amortized Stein Variational Gradient Descent**. **not suggested to use** @@ -679,7 +679,7 @@ def fit( inf_kwargs=None, **kwargs, ): - r"""Handy shortcut for using inference methods in functional way + r"""Handy shortcut for using inference methods in functional way. Parameters ---------- diff --git a/pymc/variational/minibatch_rv.py b/pymc/variational/minibatch_rv.py index be71a358c..0437f6663 100644 --- a/pymc/variational/minibatch_rv.py +++ b/pymc/variational/minibatch_rv.py @@ -25,7 +25,7 @@ class MinibatchRandomVariable(MeasurableOp, Op): - """RV whose logprob should be rescaled to match total_size""" + """RV whose logprob should be rescaled to match total_size.""" __props__ = () view_map = {0: [0]} @@ -81,8 +81,7 @@ def create_minibatch_rv( def get_scaling(total_size: Sequence[Variable], shape: TensorVariable) -> TensorVariable: - """Gets scaling constant for logp.""" - + """Get scaling constant for logp.""" # mypy doesn't understand we can convert a shape TensorVariable into a tuple shape = tuple(shape) # type: ignore diff --git a/pymc/variational/operators.py b/pymc/variational/operators.py index f6ef09572..fc1226be1 100644 --- a/pymc/variational/operators.py +++ b/pymc/variational/operators.py @@ -32,7 +32,7 @@ class KL(Operator): - R"""**Operator based on Kullback Leibler Divergence** + R"""**Operator based on Kullback Leibler Divergence**. This operator constructs Evidence Lower Bound (ELBO) objective @@ -67,7 +67,7 @@ def apply(self, f): class KSDObjective(ObjectiveFunction): - R"""Helper class for construction loss and updates for variational inference + R"""Helper class for construction loss and updates for variational inference. Parameters ---------- @@ -104,7 +104,7 @@ def __call__(self, nmc, **kwargs) -> list[Variable]: class KSD(Operator): - R"""**Operator based on Kernelized Stein Discrepancy** + R"""**Operator based on Kernelized Stein Discrepancy**. Input: A target distribution with density function :math:`p(x)` and a set of initial particles :math:`\{x^0_i\}^n_{i=1}` diff --git a/pymc/variational/opvi.py b/pymc/variational/opvi.py index 898b6afc5..b07b9ded8 100644 --- a/pymc/variational/opvi.py +++ b/pymc/variational/opvi.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -R""" +R"""Operational Variational Inference. + Variational inference is a great approach for doing really complex, often intractable Bayesian inference in approximate form. Common methods (e.g. ADVI) lack from complexity so that approximate posterior does not @@ -91,27 +92,27 @@ class VariationalInferenceError(Exception): - """Exception for VI specific cases""" + """Exception for VI specific cases.""" class NotImplementedInference(VariationalInferenceError, NotImplementedError): - """Marking non functional parts of code""" + """Marking non functional parts of code.""" class ExplicitInferenceError(VariationalInferenceError, TypeError): - """Exception for bad explicit inference""" + """Exception for bad explicit inference.""" class AEVBInferenceError(VariationalInferenceError, TypeError): - """Exception for bad aevb inference""" + """Exception for bad aevb inference.""" class ParametrizationError(VariationalInferenceError, ValueError): - """Error raised in case of bad parametrization""" + """Error raised in case of bad parametrization.""" class GroupError(VariationalInferenceError, TypeError): - """Error related to VI groups""" + """Error related to VI groups.""" def _known_scan_ignored_inputs(terms): @@ -142,8 +143,7 @@ def inner(*args, **kwargs): def node_property(f): - """A shortcut for wrapping method to accessible tensor""" - + """Wrap method to accessible tensor.""" if isinstance(f, str): def wrapper(fn): @@ -180,7 +180,7 @@ def try_to_set_test_value(node_in, node_out, s): class ObjectiveUpdates(pytensor.OrderedUpdates): - """OrderedUpdates extension for storing loss""" + """OrderedUpdates extension for storing loss.""" loss = None @@ -190,7 +190,7 @@ def _warn_not_used(smth, where): class ObjectiveFunction: - """Helper class for construction loss and updates for variational inference + """Helper class for construction loss and updates for variational inference. Parameters ---------- @@ -220,8 +220,7 @@ def updates( more_replacements=None, total_grad_norm_constraint=None, ): - """Calculate gradients for objective function, test function and then - constructs updates for optimization step + """Construct updates for optimization step after calculating gradients. Parameters ---------- @@ -398,7 +397,7 @@ def step_function( def score_function( self, sc_n_mc=None, more_replacements=None, fn_kwargs=None ): # pragma: no cover - R"""Compile scoring function that operates which takes no inputs and returns Loss + R"""Compile scoring function that operates which takes no inputs and returns Loss. Parameters ---------- @@ -435,7 +434,7 @@ def __call__(self, nmc, **kwargs): class Operator: - R"""**Base class for Operator** + R"""**Base class for Operator**. Parameters ---------- @@ -474,7 +473,7 @@ def __init__(self, approx): model = property(lambda self: self.approx.model) def apply(self, f): # pragma: no cover - R"""Operator itself + R"""Operator itself. .. math:: @@ -510,12 +509,12 @@ def __call__(self, f=None): return self.objective_class(self, f) def __str__(self): # pragma: no cover + """Return a string representation of the object.""" return f"{self.__class__.__name__}[{self.approx.__class__.__name__}]" def collect_shared_to_list(params): - """Helper function for getting a list from - usable representation of parameters + """Get a list from a usable representation of parameters. Parameters ---------- @@ -562,7 +561,7 @@ def from_function(cls, f): class Group(WithMemoization): - R"""**Base class for grouping variables in VI** + R"""**Base class for grouping variables in VI**. Grouped Approximation is used for modelling mutual dependencies for a specified group of variables. Base for local and global group. @@ -777,8 +776,9 @@ def get_param_spec_for(cls, **kwargs): return res def _check_user_params(self, **kwargs): - R"""*Dev* - checks user params, allocates them if they are correct, returns True. - If they are not present, returns False + R"""*Dev* - check user params, if correct allocate them and return True. + + If they are not present, returns False. Parameters ---------- @@ -808,7 +808,7 @@ def _check_user_params(self, **kwargs): return True def _initial_type(self, name): - R"""*Dev* - initial type with given name. The correct type depends on `self.batched` + R"""*Dev* - initial type with given name. The correct type depends on `self.batched`. Parameters ---------- @@ -822,7 +822,7 @@ def _initial_type(self, name): return pt.matrix(name) def _input_type(self, name): - R"""*Dev* - input type with given name. The correct type depends on `self.batched` + R"""*Dev* - input type with given name. The correct type depends on `self.batched`. Parameters ---------- @@ -837,6 +837,7 @@ def _input_type(self, name): @pytensor.config.change_flags(compute_test_value="off") def __init_group__(self, group): + """Initialize the group.""" if not group: raise GroupError("Got empty group") if self.group is None: @@ -875,7 +876,7 @@ def __init_group__(self, group): start_idx += size def _finalize_init(self): - """*Dev* - clean up after init""" + """*Dev* - clean up after init.""" del self._kwargs @property @@ -895,7 +896,7 @@ def params(self): return collect_shared_to_list(self.shared_params) def _new_initial_shape(self, size, dim, more_replacements=None): - """*Dev* - correctly proceeds sampling with variable batch size + """*Dev* - correctly proceeds sampling with variable batch size. Parameters ---------- @@ -921,7 +922,7 @@ def ddim(self): return sum(s.stop - s.start for _, s, _, _ in self.ordering.values()) def _new_initial(self, size, deterministic, more_replacements=None): - """*Dev* - allocates new initial random generator + """*Dev* - allocates new initial random generator. Parameters ---------- @@ -967,8 +968,7 @@ def _new_initial(self, size, deterministic, more_replacements=None): @node_property def symbolic_random(self): - """*Dev* - abstract node that takes `self.symbolic_initial` and creates - approximate posterior that is parametrized with `self.params_dict`. + """*Dev* - abstract node that takes `self.symbolic_initial` and creates approximate posterior that is parametrized with `self.params_dict`. Implementation should take in account `self.batched`. If `self.batched` is `True`, then `self.symbolic_initial` is 3d tensor, else 2d @@ -993,8 +993,7 @@ def set_size_and_deterministic( def set_size_and_deterministic( self, node: Variable | list[Variable], s, d: bool, more_replacements: dict | None = None ) -> Variable | list[Variable]: - """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or - :func:`symbolic_single_sample` new random generator can be allocated and applied to node + """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or :func:`symbolic_single_sample` new random generator can be allocated and applied to node. Parameters ---------- @@ -1011,7 +1010,6 @@ def set_size_and_deterministic( ------- :class:`Variable` or list with applied replacements, ready to use """ - flat2rand = self.make_size_and_deterministic_replacements(s, d, more_replacements) node_out = graph_replace(node, flat2rand, strict=False) assert not ( @@ -1022,12 +1020,13 @@ def set_size_and_deterministic( return node_out def to_flat_input(self, node): - """*Dev* - replace vars with flattened view stored in `self.inputs`""" + """*Dev* - replace vars with flattened view stored in `self.inputs`.""" return graph_replace(node, self.replacements, strict=False) def symbolic_sample_over_posterior(self, node): - """*Dev* - performs sampling of node applying independent samples from posterior each time. - Note that it is done symbolically and this node needs :func:`set_size_and_deterministic` call + """*Dev* - perform sampling of node applying independent samples from posterior each time. + + Note that it is done symbolically and this node needs :func:`set_size_and_deterministic` call. """ node = self.to_flat_input(node) random = self.symbolic_random.astype(self.symbolic_initial.dtype) @@ -1043,17 +1042,17 @@ def sample(post, *_): return nodes def symbolic_single_sample(self, node): - """*Dev* - performs sampling of node applying single sample from posterior. + """*Dev* - perform sampling of node applying single sample from posterior. + Note that it is done symbolically and this node needs - :func:`set_size_and_deterministic` call with `size=1` + :func:`set_size_and_deterministic` call with `size=1`. """ node = self.to_flat_input(node) random = self.symbolic_random.astype(self.symbolic_initial.dtype) return graph_replace(node, {self.input: random[0]}, strict=False) def make_size_and_deterministic_replacements(self, s, d, more_replacements=None): - """*Dev* - creates correct replacements for initial depending on - sample size and deterministic flag + """*Dev* - create correct replacements for initial depending on sample size and deterministic flag. Parameters ---------- @@ -1083,7 +1082,7 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None) @node_property def symbolic_normalizing_constant(self): - """*Dev* - normalizing constant for `self.logq`, scales it to `minibatch_size` instead of `total_size`""" + """*Dev* - normalizing constant for `self.logq`, scales it to `minibatch_size` instead of `total_size`.""" t = self.to_flat_input( pt.max( [ @@ -1099,28 +1098,26 @@ def symbolic_normalizing_constant(self): @node_property def symbolic_logq_not_scaled(self): - """*Dev* - symbolically computed logq for `self.symbolic_random` - computations can be more efficient since all is known beforehand including - `self.symbolic_random` - """ + """*Dev* - symbolically computed logq for `self.symbolic_random` computations can be more efficient since all is known beforehand including `self.symbolic_random`.""" raise NotImplementedError # shape (s,) @node_property def symbolic_logq(self): - """*Dev* - correctly scaled `self.symbolic_logq_not_scaled`""" + """*Dev* - correctly scaled `self.symbolic_logq_not_scaled`.""" return self.symbolic_logq_not_scaled @node_property def logq(self): - """*Dev* - Monte Carlo estimate for group `logQ`""" + """*Dev* - Monte Carlo estimate for group `logQ`.""" return self.symbolic_logq.mean(0) @node_property def logq_norm(self): - """*Dev* - Monte Carlo estimate for group `logQ` normalized""" + """*Dev* - Monte Carlo estimate for group `logQ` normalized.""" return self.logq / self.symbolic_normalizing_constant def __str__(self): + """Return a string representation for the object.""" if self.group is None: shp = "undefined" else: @@ -1129,23 +1126,21 @@ def __str__(self): @node_property def std(self) -> pt.TensorVariable: - """Standard deviation of the latent variables as an unstructured 1-dimensional tensor variable""" + """Return the standard deviation of the latent variables as an unstructured 1-dimensional tensor variable.""" raise NotImplementedError() @node_property def cov(self) -> pt.TensorVariable: - """Covariance between the latent variables as an unstructured 2-dimensional tensor variable""" + """Return the covariance between the latent variables as an unstructured 2-dimensional tensor variable.""" raise NotImplementedError() @node_property def mean(self) -> pt.TensorVariable: - """Mean of the latent variables as an unstructured 1-dimensional tensor variable""" + """Return the mean of the latent variables as an unstructured 1-dimensional tensor variable.""" raise NotImplementedError() def var_to_data(self, shared: pt.TensorVariable) -> xarray.Dataset: - """Takes a flat 1-dimensional tensor variable and maps it to an xarray data set based on the information in - `self.ordering`. - """ + """Take a flat 1-dimensional tensor variable and maps it to an xarray data set based on the information in `self.ordering`.""" # This is somewhat similar to `DictToArrayBijection.rmap`, which doesn't work here since we don't have # `RaveledVars` and need to take the information from `self.ordering` instead shared_nda = shared.eval() @@ -1162,12 +1157,12 @@ def var_to_data(self, shared: pt.TensorVariable) -> xarray.Dataset: @property def mean_data(self) -> xarray.Dataset: - """Mean of the latent variables as an xarray Dataset""" + """Mean of the latent variables as an xarray Dataset.""" return self.var_to_data(self.mean) @property def std_data(self) -> xarray.Dataset: - """Standard deviation of the latent variables as an xarray Dataset""" + """Standard deviation of the latent variables as an xarray Dataset.""" return self.var_to_data(self.std) @@ -1176,7 +1171,7 @@ def std_data(self) -> xarray.Dataset: class Approximation(WithMemoization): - """**Wrapper for grouped approximations** + """**Wrapper for grouped approximations**. Wraps list of groups, creates an Approximation instance that collects sampled variables from all the groups, also collects logQ needed for @@ -1242,7 +1237,7 @@ def collect(self, item): @property def scale_cost_to_minibatch(self): - """*Dev* - Property to control scaling cost to minibatch""" + """*Dev* - Property to control scaling cost to minibatch.""" return bool(self._scale_cost_to_minibatch.get_value()) @scale_cost_to_minibatch.setter @@ -1252,7 +1247,8 @@ def scale_cost_to_minibatch(self, value): @node_property def symbolic_normalizing_constant(self): """*Dev* - normalizing constant for `self.logq`, scales it to `minibatch_size` instead of `total_size`. - Here the effect is controlled by `self.scale_cost_to_minibatch` + + Here the effect is controlled by `self.scale_cost_to_minibatch`. """ t = pt.max( self.collect("symbolic_normalizing_constant") @@ -1267,22 +1263,22 @@ def symbolic_normalizing_constant(self): @node_property def symbolic_logq(self): - """*Dev* - collects `symbolic_logq` for all groups""" + """*Dev* - collects `symbolic_logq` for all groups.""" return pt.add(*self.collect("symbolic_logq")) @node_property def logq(self): - """*Dev* - collects `logQ` for all groups""" + """*Dev* - collects `logQ` for all groups.""" return pt.add(*self.collect("logq")) @node_property def logq_norm(self): - """*Dev* - collects `logQ` for all groups and normalizes it""" + """*Dev* - collects `logQ` for all groups and normalizes it.""" return self.logq / self.symbolic_normalizing_constant @node_property def _sized_symbolic_varlogp_and_datalogp(self): - """*Dev* - computes sampled prior term from model via `pytensor.scan`""" + """*Dev* - computes sampled prior term from model via `pytensor.scan`.""" varlogp_s, datalogp_s = self.symbolic_sample_over_posterior( [self.model.varlogp, self.model.datalogp] ) @@ -1290,83 +1286,79 @@ def _sized_symbolic_varlogp_and_datalogp(self): @node_property def sized_symbolic_varlogp(self): - """*Dev* - computes sampled prior term from model via `pytensor.scan`""" + """*Dev* - computes sampled prior term from model via `pytensor.scan`.""" return self._sized_symbolic_varlogp_and_datalogp[0] # shape (s,) @node_property def sized_symbolic_datalogp(self): - """*Dev* - computes sampled data term from model via `pytensor.scan`""" + """*Dev* - computes sampled data term from model via `pytensor.scan`.""" return self._sized_symbolic_varlogp_and_datalogp[1] # shape (s,) @node_property def sized_symbolic_logp(self): - """*Dev* - computes sampled logP from model via `pytensor.scan`""" + """*Dev* - computes sampled logP from model via `pytensor.scan`.""" return self.sized_symbolic_varlogp + self.sized_symbolic_datalogp # shape (s,) @node_property def logp(self): - """*Dev* - computes :math:`E_{q}(logP)` from model via `pytensor.scan` that can be optimized later""" + """*Dev* - computes :math:`E_{q}(logP)` from model via `pytensor.scan` that can be optimized later.""" return self.varlogp + self.datalogp @node_property def varlogp(self): - """*Dev* - computes :math:`E_{q}(prior term)` from model via `pytensor.scan` that can be optimized later""" + """*Dev* - computes :math:`E_{q}(prior term)` from model via `pytensor.scan` that can be optimized later.""" return self.sized_symbolic_varlogp.mean(0) @node_property def datalogp(self): - """*Dev* - computes :math:`E_{q}(data term)` from model via `pytensor.scan` that can be optimized later""" + """*Dev* - computes :math:`E_{q}(data term)` from model via `pytensor.scan` that can be optimized later.""" return self.sized_symbolic_datalogp.mean(0) @node_property def _single_symbolic_varlogp_and_datalogp(self): - """*Dev* - computes sampled prior term from model via `pytensor.scan`""" + """*Dev* - computes sampled prior term from model via `pytensor.scan`.""" varlogp, datalogp = self.symbolic_single_sample([self.model.varlogp, self.model.datalogp]) return varlogp, datalogp @node_property def single_symbolic_varlogp(self): - """*Dev* - for single MC sample estimate of :math:`E_{q}(prior term)` `pytensor.scan` - is not needed and code can be optimized""" + """*Dev* - for single MC sample estimate of :math:`E_{q}(prior term)` `pytensor.scan` is not needed and code can be optimized.""" return self._single_symbolic_varlogp_and_datalogp[0] @node_property def single_symbolic_datalogp(self): - """*Dev* - for single MC sample estimate of :math:`E_{q}(data term)` `pytensor.scan` - is not needed and code can be optimized""" + """*Dev* - for single MC sample estimate of :math:`E_{q}(data term)` `pytensor.scan` is not needed and code can be optimized.""" return self._single_symbolic_varlogp_and_datalogp[1] @node_property def single_symbolic_logp(self): - """*Dev* - for single MC sample estimate of :math:`E_{q}(logP)` `pytensor.scan` - is not needed and code can be optimized""" + """*Dev* - for single MC sample estimate of :math:`E_{q}(logP)` `pytensor.scan` is not needed and code can be optimized.""" return self.single_symbolic_datalogp + self.single_symbolic_varlogp @node_property def logp_norm(self): - """*Dev* - normalized :math:`E_{q}(logP)`""" + """*Dev* - normalized :math:`E_{q}(logP)`.""" return self.logp / self.symbolic_normalizing_constant @node_property def varlogp_norm(self): - """*Dev* - normalized :math:`E_{q}(prior term)`""" + """*Dev* - normalized :math:`E_{q}(prior term)`.""" return self.varlogp / self.symbolic_normalizing_constant @node_property def datalogp_norm(self): - """*Dev* - normalized :math:`E_{q}(data term)`""" + """*Dev* - normalized :math:`E_{q}(data term)`.""" return self.datalogp / self.symbolic_normalizing_constant @property def replacements(self): - """*Dev* - all replacements from groups to replace PyMC random variables with approximation""" + """*Dev* - all replacements from groups to replace PyMC random variables with approximation.""" return collections.OrderedDict( itertools.chain.from_iterable(g.replacements.items() for g in self.groups) ) def make_size_and_deterministic_replacements(self, s, d, more_replacements=None): - """*Dev* - creates correct replacements for initial depending on - sample size and deterministic flag + """*Dev* - create correct replacements for initial depending on sample size and deterministic flag. Parameters ---------- @@ -1391,8 +1383,7 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None) @pytensor.config.change_flags(compute_test_value="off") def set_size_and_deterministic(self, node, s, d, more_replacements=None): - """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or - :func:`symbolic_single_sample` new random generator can be allocated and applied to node + """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or :func:`symbolic_single_sample` new random generator can be allocated and applied to node. Parameters ---------- @@ -1419,14 +1410,15 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None): return node def to_flat_input(self, node, more_replacements=None): - """*Dev* - replace vars with flattened view stored in `self.inputs`""" + """*Dev* - replace vars with flattened view stored in `self.inputs`.""" more_replacements = more_replacements or {} node = graph_replace(node, more_replacements, strict=False) return graph_replace(node, self.replacements, strict=False) def symbolic_sample_over_posterior(self, node, more_replacements=None): - """*Dev* - performs sampling of node applying independent samples from posterior each time. - Note that it is done symbolically and this node needs :func:`set_size_and_deterministic` call + """*Dev* - perform sampling of node applying independent samples from posterior each time. + + Note that it is done symbolically and this node needs :func:`set_size_and_deterministic` call. """ node = self.to_flat_input(node) @@ -1440,9 +1432,10 @@ def sample(*post): return nodes def symbolic_single_sample(self, node, more_replacements=None): - """*Dev* - performs sampling of node applying single sample from posterior. + """*Dev* - perform sampling of node applying single sample from posterior. + Note that it is done symbolically and this node needs - :func:`set_size_and_deterministic` call with `size=1` + :func:`set_size_and_deterministic` call with `size=1`. """ node = self.to_flat_input(node, more_replacements=more_replacements) post = [v[0] for v in self.symbolic_randoms] @@ -1450,8 +1443,10 @@ def symbolic_single_sample(self, node, more_replacements=None): return graph_replace(node, dict(zip(inp, post)), strict=False) def get_optimization_replacements(self, s, d): - """*Dev* - optimizations for logP. If sample size is static and equal to 1: - then `pytensor.scan` MC estimate is replaced with single sample without call to `pytensor.scan`. + """*Dev* - optimizations for logP. + + If sample size is static and equal to 1, then `pytensor.scan` MC + estimate is replaced with single sample without call to `pytensor.scan`. """ repl = collections.OrderedDict() # avoid scan if size is constant and equal to one @@ -1462,7 +1457,7 @@ def get_optimization_replacements(self, s, d): @pytensor.config.change_flags(compute_test_value="off") def sample_node(self, node, size=None, deterministic=False, more_replacements=None): - """Samples given node or nodes over shared posterior + """Sample given node or nodes over shared posterior. Parameters ---------- @@ -1497,7 +1492,8 @@ def sample_node(self, node, size=None, deterministic=False, more_replacements=No def rslice(self, name): """*Dev* - vectorized sampling for named random variable without call to `pytensor.scan`. - This node still needs :func:`set_size_and_deterministic` to be evaluated + + This node still needs :func:`set_size_and_deterministic` to be evaluated. """ def vars_names(vs): @@ -1590,6 +1586,7 @@ def symbolic_random(self): return pt.concatenate(self.collect("symbolic_random"), axis=-1) def __str__(self): + """Return a string representation of the object.""" if len(self.groups) < 5: return "Approximation{" + " & ".join(map(str, self.groups)) + "}" else: diff --git a/pymc/variational/test_functions.py b/pymc/variational/test_functions.py index 303c6cc09..26ad06193 100644 --- a/pymc/variational/test_functions.py +++ b/pymc/variational/test_functions.py @@ -21,8 +21,8 @@ class Kernel(TestFunction): - """ - Dummy base class for kernel SVGD in case we implement more + r""" + Dummy base class for kernel SVGD in case we implement more. .. math:: diff --git a/pymc/variational/updates.py b/pymc/variational/updates.py index d919a7e24..656dbd042 100644 --- a/pymc/variational/updates.py +++ b/pymc/variational/updates.py @@ -136,7 +136,7 @@ def get_or_compute_grads(loss_or_grads, params): - """Helper function returning a list of gradients + """Return a list of gradients. Parameters ---------- @@ -185,7 +185,7 @@ def _get_call_kwargs(_locals_): def sgd(loss_or_grads=None, params=None, learning_rate=1e-3): - """Stochastic Gradient Descent (SGD) updates + """Stochastic Gradient Descent (SGD) updates. Generates update expressions of the form: @@ -238,9 +238,9 @@ def sgd(loss_or_grads=None, params=None, learning_rate=1e-3): def apply_momentum(updates, params=None, momentum=0.9): - """Returns a modified update dictionary including momentum + """Return a modified update dictionary including momentum. - Generates update expressions of the form: + Generate update expressions of the form: * ``velocity := momentum * velocity + updates[param] - param`` * ``param := param + velocity`` @@ -285,7 +285,7 @@ def apply_momentum(updates, params=None, momentum=0.9): def momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momentum=0.9): - """Stochastic Gradient Descent (SGD) updates with momentum + """Stochastic Gradient Descent (SGD) updates with momentum. Generates update expressions of the form: @@ -345,9 +345,9 @@ def momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momentum=0.9): def apply_nesterov_momentum(updates, params=None, momentum=0.9): - """Returns a modified update dictionary including Nesterov momentum + """Return a modified update dictionary including Nesterov momentum. - Generates update expressions of the form: + Generate update expressions of the form: * ``velocity := momentum * velocity + updates[param] - param`` * ``param := param + momentum * velocity + updates[param] - param`` @@ -398,7 +398,7 @@ def apply_nesterov_momentum(updates, params=None, momentum=0.9): def nesterov_momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momentum=0.9): - """Stochastic Gradient Descent (SGD) updates with Nesterov momentum + """Stochastic Gradient Descent (SGD) updates with Nesterov momentum. Generates update expressions of the form: @@ -463,7 +463,7 @@ def nesterov_momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momen def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6): - """Adagrad updates + r"""Adagrad updates. Scale learning rates by dividing with the square root of accumulated squared gradients. See [1]_ for further description. @@ -540,8 +540,9 @@ def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6): def adagrad_window(loss_or_grads=None, params=None, learning_rate=0.001, epsilon=0.1, n_win=10): - """Returns a function that returns parameter updates. - Instead of accumulated estimate, uses running window + """Return a function that returns parameter updates. + + Instead of accumulated estimate, uses running window. Parameters ---------- @@ -585,7 +586,7 @@ def adagrad_window(loss_or_grads=None, params=None, learning_rate=0.001, epsilon def rmsprop(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.9, epsilon=1e-6): - """RMSProp updates + r"""RMSProp updates. Scale learning rates by dividing with the moving average of the root mean squared (RMS) gradients. See [1]_ for further description. @@ -666,7 +667,7 @@ def rmsprop(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.9, epsilon def adadelta(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsilon=1e-6): - r"""Adadelta updates + r"""Adadelta updates. Scale learning rates by the ratio of accumulated gradients to accumulated updates, see [1]_ and notes for further description. @@ -772,7 +773,7 @@ def adadelta(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsil def adam( loss_or_grads=None, params=None, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8 ): - """Adam updates + """Adam updates. Adam updates implemented as in [1]_. @@ -859,7 +860,7 @@ def adam( def adamax( loss_or_grads=None, params=None, learning_rate=0.002, beta1=0.9, beta2=0.999, epsilon=1e-8 ): - """Adamax updates + """Adamax updates. Adamax updates implemented as in [1]_. This is a variant of the Adam algorithm based on the infinity norm. @@ -941,7 +942,7 @@ def adamax( def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7): - """Max weight norm constraints and gradient clipping + """Max weight norm constraints and gradient clipping. This takes a TensorVariable and rescales it so that incoming weight norms are below a specified constraint value. Vectors violating the @@ -1016,7 +1017,7 @@ def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7): def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, return_norm=False): - """Rescales a list of tensors based on their combined norm + """Rescales a list of tensors based on their combined norm. If the combined norm of the input tensors exceeds the threshold then all tensors are rescaled such that the combined norm is equal to the threshold. diff --git a/pyproject.toml b/pyproject.toml index 770fd0e04..a5efd4b7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,30 +44,15 @@ ignore = [ "RUF001", # String contains ambiguous character (such as Greek letters) "RUF002", # Docstring contains ambiguous character (such as Greek letters) "RUF012", # Mutable class attributes should be annotated with `typing.ClassVar` - "D100", - "D101", - "D102", - "D103", - "D104", - "D105", - "D107", - "D200", - "D202", - "D203", - "D204", - "D205", - "D209", - "D212", - "D213", - "D301", - "D400", - "D401", - "D403", - "D413", - "D415", - "D417", + "D100", # Missing docstring in public module + "D101", # Missing docstring in public class + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function ] +[tool.ruff.lint.pydocstyle] +convention = "numpy" + [tool.ruff.lint.isort] lines-between-types = 1 diff --git a/scripts/check_all_tests_are_covered.py b/scripts/check_all_tests_are_covered.py index 4d2b13386..23079338d 100644 --- a/scripts/check_all_tests_are_covered.py +++ b/scripts/check_all_tests_are_covered.py @@ -31,7 +31,7 @@ def find_testfiles(): def from_yaml(): - """Determines how often each test file is run per platform and floatX setting. + """Determine how often each test file is run per platform and floatX setting. An exception is raised if tests run multiple times with the same configuration. """ diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py index 6ad6dac6b..698d54a1d 100755 --- a/scripts/generate_pip_deps_from_conda.py +++ b/scripts/generate_pip_deps_from_conda.py @@ -31,7 +31,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ -Check requirements-dev.txt has been generated from conda-envs/environment-dev.yml +Check requirements-dev.txt has been generated from conda-envs/environment-dev.yml. This is intended to be used as a pre-commit hook, see `.pre-commit-config.yaml`. You can run it manually with `pre-commit run pip-from-conda --all`. @@ -95,8 +95,7 @@ def conda_package_to_pip(package): def main(conda_fname, pip_fname): """ - Generate the pip dependencies file from the conda file, or compare that - they are synchronized (``compare=True``). + Generate the pip dependencies file from the conda file. Parameters ---------- @@ -104,10 +103,6 @@ def main(conda_fname, pip_fname): Path to the conda file with dependencies (e.g. `environment.yml`). pip_fname : str Path to the pip file with dependencies (e.g. `requirements-dev.txt`). - compare : bool, default False - Whether to generate the pip file (``False``) or to compare if the - pip file has been generated with this script and the last version - of the conda file (``True``). Returns ------- diff --git a/scripts/run_mypy.py b/scripts/run_mypy.py index f5a0b6d3a..842fb0a13 100755 --- a/scripts/run_mypy.py +++ b/scripts/run_mypy.py @@ -1,7 +1,8 @@ #!/usr/bin/env python """ -Invokes mypy and compare the reults with files in /pymc except tests -and a list of files that are known to fail. +Invoke mypy and compare the reults with files in /pymc. + +Excludes tests and a list of files that are known to fail. Exit code 0 indicates that there are no unexpected results. @@ -97,7 +98,7 @@ def mypy_to_pandas(input_lines: Iterator[str]) -> pandas.DataFrame: def check_no_unexpected_results(mypy_lines: Iterator[str]): - """Compares mypy results with list of known FAILING files. + """Compare mypy results with list of known FAILING files. Exits the process with non-zero exit code upon unexpected results. """ diff --git a/setupegg.py b/setupegg.py index e179aafc0..c263f9584 100755 --- a/setupegg.py +++ b/setupegg.py @@ -13,9 +13,7 @@ # limitations under the License. #!/usr/bin/env python -""" -A setup.py script to use setuptools, which gives egg goodness, etc. -""" +"""A setup.py script to use setuptools, which gives egg goodness, etc.""" with open("setup.py") as s: exec(s.read()) diff --git a/tests/test_pytensorf.py b/tests/test_pytensorf.py index f0ae43559..b3564cac1 100644 --- a/tests/test_pytensorf.py +++ b/tests/test_pytensorf.py @@ -277,7 +277,7 @@ def test_convert_generator_data(input_dtype): result = convert_generator_data(square_generator) apply = result.owner op = apply.op - # Make sure the returned object is an PyTensor TensorVariable + # Make sure the returned object is a PyTensor TensorVariable assert isinstance(result, TensorVariable) assert isinstance(op, GeneratorOp), f"It's a {type(apply)}" # There are no inputs - because it generates...