diff --git a/conda-envs/environment-dev.yml b/conda-envs/environment-dev.yml index b6a9c52b91..634fae9088 100644 --- a/conda-envs/environment-dev.yml +++ b/conda-envs/environment-dev.yml @@ -25,6 +25,7 @@ dependencies: - jupyter-sphinx - myst-nb<=1.0.0 - numpydoc +- preliz>=0.20.0 - pre-commit>=2.8.0 - polyagamma - pytest-cov>=2.5 diff --git a/conda-envs/environment-docs.yml b/conda-envs/environment-docs.yml index 1fbddb548e..e5f5c23008 100644 --- a/conda-envs/environment-docs.yml +++ b/conda-envs/environment-docs.yml @@ -25,6 +25,7 @@ dependencies: - myst-nb<=1.0.0 - numpydoc - polyagamma +- preliz>=0.20.0 - pre-commit>=2.8.0 - pymc-sphinx-theme>=0.16 - sphinx-copybutton diff --git a/conda-envs/environment-test.yml b/conda-envs/environment-test.yml index becb58339f..ac4a094c20 100644 --- a/conda-envs/environment-test.yml +++ b/conda-envs/environment-test.yml @@ -24,6 +24,7 @@ dependencies: - zarr>=2.5.0,<3 # Extra dependencies for testing - ipython>=7.16 +- preliz>=0.20.0 - pre-commit>=2.8.0 - pytest-cov>=2.5 - pytest>=3.0 diff --git a/conda-envs/windows-environment-dev.yml b/conda-envs/windows-environment-dev.yml index 3dd404db7c..023d7194b9 100644 --- a/conda-envs/windows-environment-dev.yml +++ b/conda-envs/windows-environment-dev.yml @@ -25,6 +25,7 @@ dependencies: - myst-nb<=1.0.0 - numpydoc - polyagamma +- preliz>=0.20.0 - pre-commit>=2.8.0 - pytest-cov>=2.5 - pytest>=3.0 diff --git a/conda-envs/windows-environment-test.yml b/conda-envs/windows-environment-test.yml index f921088824..e2cd68abeb 100644 --- a/conda-envs/windows-environment-test.yml +++ b/conda-envs/windows-environment-test.yml @@ -25,6 +25,7 @@ dependencies: - zarr>=2.5.0,<3 # Extra dependencies for testing - ipython>=7.16 +- preliz>=0.20.0 - pre-commit>=2.8.0 - pytest-cov>=2.5 - pytest>=3.0 diff --git a/pymc/func_utils.py b/pymc/func_utils.py index e7acb3dff0..5d85e1e7fc 100644 --- a/pymc/func_utils.py +++ b/pymc/func_utils.py @@ -11,15 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import warnings - -from collections.abc import Callable - -import numpy as np -import pytensor.tensor as pt -from pytensor.gradient import NullTypeGradError -from scipy import optimize +import warnings import pymc as pm @@ -30,24 +23,25 @@ def find_constrained_prior( distribution: pm.Distribution, lower: float, upper: float, - init_guess: dict[str, float], + init_guess: dict[str, float] | None = None, mass: float = 0.95, fixed_params: dict[str, float] | None = None, + fixed_stat: tuple[str, float] | None = None, mass_below_lower: float | None = None, **kwargs, ) -> dict[str, float]: """ - Find optimal parameters to get `mass` % of probability of a distribution between `lower` and `upper`. + Find the maximum entropy distribution that satisfies the constraints. + + Find the maximum entropy distribution with `mass` in the interval + defined by the `lower` and `upper` end-points. - Note: only works for one- and two-parameter distributions, as there - are exactly two constraints. Fix some combination of parameters - if you want to use it on >=3-parameter distributions. + Additional constraints can be set via `fixed_params` and `fixed_stat`. Parameters ---------- distribution : Distribution PyMC distribution you want to set a prior on. - Needs to have a ``logcdf`` method implemented in PyMC. lower : float Lower bound to get `mass` % of probability of `pm_dist`. upper : float @@ -65,11 +59,10 @@ def find_constrained_prior( Dictionary of fixed parameters, so that there are only 2 to optimize. For instance, for a StudentT, you fix nu to a constant and get the optimized mu and sigma. - mass_below_lower : float, optional, default None - The probability mass below the ``lower`` bound. If ``None``, - defaults to ``(1 - mass) / 2``, which implies that the probability - mass below the ``lower`` value will be equal to the probability - mass above the ``upper`` value. + fixed_stat: tuple + Summary statistic to fix. The first element should be a name and the second a + numerical value. Valid names are: "mean", "mode", "median", "var", "std", + "skewness", "kurtosis". Defaults to None. Returns ------- @@ -78,19 +71,12 @@ def find_constrained_prior( Dictionary keys are the parameter names and dictionary values are the optimized parameter values. - Notes - ----- - Optional keyword arguments can be passed to ``find_constrained_prior``. These will be - delivered to the underlying call to :external:py:func:`scipy.optimize.minimize`. - Examples -------- .. code-block:: python # get parameters obeying constraints - opt_params = pm.find_constrained_prior( - pm.Gamma, lower=0.1, upper=0.4, mass=0.75, init_guess={"alpha": 1, "beta": 10} - ) + opt_params = pm.find_constrained_prior(pm.Gamma, lower=0.1, upper=0.4, mass=0.75) # use these parameters to draw random samples samples = pm.Gamma.dist(**opt_params, size=100).eval() @@ -104,17 +90,9 @@ def find_constrained_prior( pm.StudentT, lower=0, upper=1, - init_guess={"mu": 5, "sigma": 2}, fixed_params={"nu": 7}, ) - Under some circumstances, you might not want to have the same cumulative - probability below the ``lower`` threshold and above the ``upper`` threshold. - For example, you might want to constrain an Exponential distribution to - find the parameter that yields 90% of the mass below the ``upper`` bound, - and have zero mass below ``lower``. You can do that with the following call - to ``find_constrained_prior`` - .. code-block:: python opt_params = pm.find_constrained_prior( @@ -122,83 +100,46 @@ def find_constrained_prior( lower=0, upper=3.0, mass=0.9, - init_guess={"lam": 1}, - mass_below_lower=0, ) """ - warnings.warn( - "find_constrained_prior is deprecated and will be removed in a future version. " - "Please use maxent function from PreliZ. " - "https://preliz.readthedocs.io/en/latest/api_reference.html#preliz.unidimensional.maxent", - FutureWarning, - stacklevel=2, - ) - - assert 0.01 <= mass <= 0.99, ( - "This function optimizes the mass of the given distribution +/- " - f"1%, so `mass` has to be between 0.01 and 0.99. You provided {mass}." - ) - if mass_below_lower is None: - mass_below_lower = (1 - mass) / 2 - - # exit when any parameter is not scalar: - if np.any(np.asarray(distribution.rv_op.ndims_params) != 0): - raise NotImplementedError( - "`pm.find_constrained_prior` does not work with non-scalar parameters yet.\n" - "Feel free to open a pull request on PyMC repo if you really need this feature." - ) - - dist_params = pt.vector("dist_params") - params_to_optim = { - arg_name: dist_params[i] for arg_name, i in zip(init_guess.keys(), range(len(init_guess))) - } - - if fixed_params is not None: - params_to_optim.update(fixed_params) - - dist = distribution.dist(**params_to_optim) - try: - logcdf_lower = pm.logcdf(dist, pm.floatX(lower)) - logcdf_upper = pm.logcdf(dist, pm.floatX(upper)) - except AttributeError: - raise AttributeError( - f"You cannot use `find_constrained_prior` with {distribution} -- it doesn't have a logcdf " - "method yet.\nOpen an issue or, even better, a pull request on PyMC repo if you really " - "need it." + from preliz import distributions + from preliz.unidimensional import maxent + except ImportError as e: + raise ImportError( + "The `find_constrained_prior` function requires the `preliz` package. " + "You can install it via `pip install preliz` or `conda install -c conda-forge preliz`." + ) from e + + if fixed_params is None: + fixed_params = {} + + if init_guess is not None: + warnings.warn( + "The `init_guess` argument is deprecated and will be removed in a future version. " + "The initial guess is determined automatically.", + FutureWarning, + stacklevel=2, ) - target = (pt.exp(logcdf_lower) - mass_below_lower) ** 2 - target_fn = pm.pytensorf.compile([dist_params], target, allow_input_downcast=True) - - constraint = pt.exp(logcdf_upper) - pt.exp(logcdf_lower) - constraint_fn = pm.pytensorf.compile([dist_params], constraint, allow_input_downcast=True) - - jac: str | Callable - constraint_jac: str | Callable - try: - pytensor_jac = pm.gradient(target, [dist_params]) - jac = pm.pytensorf.compile([dist_params], pytensor_jac, allow_input_downcast=True) - pytensor_constraint_jac = pm.gradient(constraint, [dist_params]) - constraint_jac = pm.pytensorf.compile( - [dist_params], pytensor_constraint_jac, allow_input_downcast=True + if mass_below_lower is not None: + warnings.warn( + "The `mass_below_lower` argument is deprecated and will be removed in a future version. " + "Use `fixed_stat` or `mode` to add additional constraints.", + FutureWarning, + stacklevel=2, ) - # when PyMC cannot compute the gradient - except (NotImplementedError, NullTypeGradError): - jac = "2-point" - constraint_jac = "2-point" - cons = optimize.NonlinearConstraint(constraint_fn, lb=mass, ub=mass, jac=constraint_jac) - - opt = optimize.minimize( - target_fn, x0=list(init_guess.values()), jac=jac, constraints=cons, **kwargs - ) - if not opt.success: - raise ValueError( - f"Optimization of parameters failed.\nOptimization termination details:\n{opt}" + + if kwargs: + warnings.warn( + "Passing additional keyword arguments is deprecated and will be " + "removed in a future version.", + DeprecationWarning, + stacklevel=2, ) - # save optimal parameters - opt_params = dict(zip(init_guess.keys(), opt.x)) - if fixed_params is not None: - opt_params.update(fixed_params) - return opt_params + dist = getattr(distributions, distribution.__name__) + + return maxent( + dist(**fixed_params), lower=lower, upper=upper, mass=mass, fixed_stat=fixed_stat, plot=False + ).params_dict diff --git a/requirements-dev.txt b/requirements-dev.txt index 8b59683594..643764f1d9 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -15,6 +15,7 @@ numpydoc pandas>=0.24.0 polyagamma pre-commit>=2.8.0 +preliz>=0.20.0 pymc-sphinx-theme>=0.16.0 pytensor>=2.32.0,<2.33 pytest-cov>=2.5 diff --git a/tests/test_func_utils.py b/tests/test_func_utils.py index e0815c86c7..fe2a378b99 100644 --- a/tests/test_func_utils.py +++ b/tests/test_func_utils.py @@ -12,111 +12,32 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import pytest import pymc as pm @pytest.mark.parametrize( - "distribution, lower, upper, init_guess, fixed_params, mass_below_lower", + "distribution, lower, upper, mass, fixed_params, fixed_stats", [ - (pm.Gamma, 0.1, 0.4, {"alpha": 1, "beta": 10}, {}, None), - (pm.Normal, 155, 180, {"mu": 170, "sigma": 3}, {}, None), - (pm.StudentT, 0.1, 0.4, {"mu": 10, "sigma": 3}, {"nu": 7}, None), - (pm.StudentT, 0, 1, {"mu": 5, "sigma": 2, "nu": 7}, {}, None), - (pm.Exponential, 0, 1, {"lam": 1}, {}, 0), - (pm.HalfNormal, 0, 1, {"sigma": 1}, {}, 0), - (pm.Binomial, 0, 8, {"p": 0.5}, {"n": 10}, None), - (pm.Poisson, 1, 15, {"mu": 10}, {}, None), - (pm.Poisson, 19, 41, {"mu": 30}, {}, None), + (pm.Gamma, 0.1, 0.4, None, {}, None), + (pm.StudentT, 0.1, 0.4, 0.7, {"nu": 7}, None), + (pm.Beta, 0, 0.7, 0.9, {}, ("mode", 0.25)), ], ) -@pytest.mark.parametrize("mass", [0.5, 0.75, 0.95]) def test_find_constrained_prior( - distribution, lower, upper, init_guess, fixed_params, mass, mass_below_lower + distribution, + lower, + upper, + mass, + fixed_params, + fixed_stats, ): - opt_params = pm.find_constrained_prior( + pm.find_constrained_prior( distribution, lower=lower, upper=upper, mass=mass, - init_guess=init_guess, fixed_params=fixed_params, - mass_below_lower=mass_below_lower, + fixed_stat=fixed_stats, ) - - opt_distribution = distribution.dist(**opt_params) - mass_in_interval = ( - pm.math.exp(pm.logcdf(opt_distribution, upper)) - - pm.math.exp(pm.logcdf(opt_distribution, lower)) - ).eval() - assert np.abs(mass_in_interval - mass) <= 1e-5 - - -@pytest.mark.parametrize( - "distribution, lower, upper, init_guess, fixed_params", - [ - (pm.Gamma, 0.1, 0.4, {"alpha": 1}, {"beta": 10}), - (pm.Exponential, 0.1, 1, {"lam": 1}, {}), - (pm.Binomial, 0, 2, {"p": 0.8}, {"n": 10}), - ], -) -def test_find_constrained_prior_error_too_large( - distribution, lower, upper, init_guess, fixed_params -): - with pytest.raises( - ValueError, match="Optimization of parameters failed.\nOptimization termination details:\n" - ): - pm.find_constrained_prior( - distribution, - lower=lower, - upper=upper, - mass=0.95, - init_guess=init_guess, - fixed_params=fixed_params, - ) - - -def test_find_constrained_prior_input_errors(): - # missing param - with pytest.raises(TypeError, match="required positional argument"): - pm.find_constrained_prior( - pm.StudentT, - lower=0.1, - upper=0.4, - mass=0.95, - init_guess={"mu": 170, "sigma": 3}, - ) - - # mass too high - with pytest.raises(AssertionError, match="has to be between 0.01 and 0.99"): - pm.find_constrained_prior( - pm.StudentT, - lower=0.1, - upper=0.4, - mass=0.995, - init_guess={"mu": 170, "sigma": 3}, - fixed_params={"nu": 7}, - ) - - # mass too low - with pytest.raises(AssertionError, match="has to be between 0.01 and 0.99"): - pm.find_constrained_prior( - pm.StudentT, - lower=0.1, - upper=0.4, - mass=0.005, - init_guess={"mu": 170, "sigma": 3}, - fixed_params={"nu": 7}, - ) - - # non-scalar params - with pytest.raises(NotImplementedError, match="does not work with non-scalar parameters yet"): - pm.find_constrained_prior( - pm.MvNormal, - lower=0, - upper=1, - mass=0.95, - init_guess={"mu": 5, "cov": np.asarray([[1, 0.2], [0.2, 1]])}, - )