Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 129 additions & 1 deletion skpro/distributions/base/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class BaseDistribution(BaseObject):
"approx_spl": 1000, # sample size used in other MC estimates
"bisect_iter": 1000, # max iters for bisection method in ppf
# which methods are approximate (not numerically exact) should be listed here
"capabilities:approx": ["energy", "mean", "var", "pdfnorm"],
"capabilities:approx": ["energy", "mean", "var", "pdfnorm", "truncated_mean"],
# broadcasting and parameter settings
# -----------------------------------
# used to control broadcasting of parameters
Expand Down Expand Up @@ -1244,7 +1244,48 @@ def _energy_x(self, x):
-------
2D np.ndarray, same shape as ``self``
energy values w.r.t. the given points

Notes
-----
Uses an analytical shortcut when both ``"truncated_mean"`` and
``"cdf"`` appear in the subclass's ``capabilities:exact`` tag.
In that case, the energy is computed via:

.. math::

\mathbb{E}[|X - c|]
= (\mathbb{E}[X \mid X > c] - c)(1 - F(c))
+ (c - \mathbb{E}[X \mid X < c]) F(c)

When either capability is missing, falls back to Monte Carlo
via ``_energy_default``.
"""
exact_capas = self.get_tag("capabilities:exact", [])
has_exact_trunc = "truncated_mean" in exact_capas
has_exact_cdf = "cdf" in exact_capas

if has_exact_trunc and has_exact_cdf:
cdf_x = self._cdf(x)
x_float = np.asarray(x, dtype=float)
inf_arr = np.full_like(x_float, np.inf)
neg_inf_arr = np.full_like(x_float, -np.inf)

right_mean = self._truncated_mean(x_float, inf_arr)
left_mean = self._truncated_mean(neg_inf_arr, x_float)

right_mass = 1 - cdf_x
left_mass = cdf_x
zero_right = right_mass < 1e-15
zero_left = left_mass < 1e-15

right_term = np.where(zero_right, 0.0, (right_mean - x_float) * right_mass)
left_term = np.where(zero_left, 0.0, (x_float - left_mean) * left_mass)
energy_arr = right_term + left_term

if energy_arr.ndim > 0:
energy_arr = np.sum(energy_arr, axis=1)
return energy_arr

return self._energy_default(x)

def _energy_default(self, x=None):
Expand Down Expand Up @@ -1397,6 +1438,93 @@ def _mean(self):
spl = self.sample(approx_spl_size)
return self._sample_mean(spl)

def truncated_mean(self, lower=None, upper=None):
r"""Return expected value of the distribution truncated to [lower, upper].

Let :math:`X` be a random variable with the distribution of ``self``.
Returns :math:`\mathbb{E}[X \mid \text{lower} < X < \text{upper}]`.

If ``lower`` is None, it is treated as :math:`-\infty`.
If ``upper`` is None, it is treated as :math:`+\infty`.
If both are None, this is equivalent to ``mean()``.

Parameters
----------
lower : float or array-like or None, optional, default=None
lower truncation bound
upper : float or array-like or None, optional, default=None
upper truncation bound

Returns
-------
``pd.DataFrame`` with same rows, columns as ``self``
truncated expected value of distribution (entry-wise)
"""
if lower is None and upper is None:
return self.mean()
if lower is None:
lower = -np.inf
if upper is None:
upper = np.inf
return self._boilerplate("_truncated_mean", lower=lower, upper=upper)

def _truncated_mean(self, lower, upper):
r"""Return expected value of the distribution truncated to [lower, upper].

Private method, to be implemented by subclasses.

Parameters
----------
lower : 2D np.ndarray, same shape as ``self``
lower truncation bound (entry-wise)
upper : 2D np.ndarray, same shape as ``self``
upper truncation bound (entry-wise)

Returns
-------
2D np.ndarray, same shape as ``self``
truncated expected value of distribution (entry-wise)
"""
approx_spl_size = self.get_tag("approx_spl")

if self._has_implementation_of("_cdf") and self._has_implementation_of("_ppf"):
approx_method = (
"by approximating the truncated mean via ppf integration "
f"over the truncated CDF region, with {approx_spl_size} nodes"
)
warn(self._method_error_msg("truncated_mean", fill_in=approx_method))

cdf_l = self._cdf(lower)
cdf_u = self._cdf(upper)

ps = np.linspace(0, 1, approx_spl_size + 2)[1:-1]
qs = [self._ppf(cdf_l + p * (cdf_u - cdf_l)) for p in ps]
np3D = np.array(qs)
result = np.mean(np3D, axis=0)

zero_mass = np.abs(cdf_u - cdf_l) < 1e-15
if np.any(zero_mass):
result = np.where(zero_mass, np.nan, result)

return result

approx_method = (
"by approximating the truncated mean by the arithmetic mean of "
f"{approx_spl_size} samples filtered to the truncation bounds"
)
warn(self._method_error_msg("truncated_mean", fill_in=approx_method))

spl = self.sample(approx_spl_size)
spl_arr = spl.values if hasattr(spl, "values") else np.asarray(spl)
spl_arr = spl_arr.reshape(approx_spl_size, *lower.shape)

mask = (spl_arr >= lower) & (spl_arr <= upper)
masked = np.where(mask, spl_arr, np.nan)
with np.errstate(all="ignore"):
result = np.nanmean(masked, axis=0)

return result

def var(self):
r"""Return element/entry-wise variance of the distribution.

Expand Down
51 changes: 51 additions & 0 deletions skpro/distributions/exponential.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class Exponential(_ScipyAdapter):
"log_pdf",
"cdf",
"energy",
"truncated_mean",
],
"distr:measuretype": "continuous",
"broadcast_init": "on",
Expand Down Expand Up @@ -92,6 +93,56 @@ def _energy_x(self, x):
energy_arr = energy_arr.sum(axis=1)
return energy_arr

def _truncated_mean(self, lower, upper):
r"""Return expected value of the distribution truncated to [lower, upper].

For :math:`X \sim \text{Exp}(\lambda)` with support :math:`[0, \infty)`:

.. math::

\mathbb{E}[X \mid a < X < b]
= \frac{(a + 1/\lambda) e^{-\lambda a}
- (b + 1/\lambda) e^{-\lambda b}}
{e^{-\lambda a} - e^{-\lambda b}}

where :math:`a = \max(\text{lower}, 0)`.

Parameters
----------
lower : 2D np.ndarray, same shape as ``self``
lower truncation bound
upper : 2D np.ndarray, same shape as ``self``
upper truncation bound

Returns
-------
2D np.ndarray, same shape as ``self``
truncated expected value of distribution (entry-wise)
"""
rate = self._bc_params["rate"]
inv_rate = 1.0 / rate

a = np.maximum(lower, 0.0)
b = np.asarray(upper, dtype=float)

b_safe = np.where(np.isfinite(b), b, 0.0)
exp_a = np.exp(-rate * a)
exp_b_raw = np.exp(-rate * b_safe)
is_b_finite = np.isfinite(b)

term_a = (a + inv_rate) * exp_a
term_b = np.where(is_b_finite, (b_safe + inv_rate) * exp_b_raw, 0.0)

numer = term_a - term_b
denom = exp_a - np.where(is_b_finite, exp_b_raw, 0.0)

safe_denom = np.where(np.abs(denom) < 1e-15, np.nan, denom)
result = numer / safe_denom

no_overlap = (a >= b) | ((upper <= 0) & np.isfinite(upper))
result = np.where(no_overlap, np.nan, result)
return result

@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the distribution."""
Expand Down
68 changes: 67 additions & 1 deletion skpro/distributions/laplace.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,16 @@ class Laplace(BaseDistribution):
# estimator tags
# --------------
"capabilities:approx": ["pdfnorm"],
"capabilities:exact": ["mean", "var", "energy", "pdf", "log_pdf", "cdf", "ppf"],
"capabilities:exact": [
"mean",
"var",
"energy",
"pdf",
"log_pdf",
"cdf",
"ppf",
"truncated_mean",
],
"distr:measuretype": "continuous",
"distr:paramtype": "parametric",
"broadcast_init": "on",
Expand Down Expand Up @@ -115,6 +124,63 @@ def _mean(self):
"""
return self._bc_params["mu"]

def _truncated_mean(self, lower, upper):
r"""Return expected value of the distribution truncated to [lower, upper].

Uses the partial expectation formula for Laplace:

.. math::

\mathbb{E}[X \cdot \mathbf{1}(X < c)] =
\begin{cases}
\frac{c - s}{2} e^{(c - \mu)/s} & c \le \mu \\
\mu - \frac{c + s}{2} e^{-{(c - \mu)/s}} & c > \mu
\end{cases}

where :math:`s` is the scale parameter.

Parameters
----------
lower : 2D np.ndarray, same shape as ``self``
lower truncation bound
upper : 2D np.ndarray, same shape as ``self``
upper truncation bound

Returns
-------
2D np.ndarray, same shape as ``self``
truncated expected value of distribution (entry-wise)
"""
mu = self._bc_params["mu"]
s = self._bc_params["scale"]

def _partial_expectation(c):
"""E[X * 1(X < c)] for Laplace(mu, s).

At c = -inf the result is 0, at c = +inf the result is mu.
"""
c = np.asarray(c, dtype=float)
is_neginf = np.isneginf(c)
is_posinf = np.isposinf(c)
c_safe = np.where(np.isfinite(c), c, mu)

below_mu = c_safe <= mu
e_below = 0.5 * (c_safe - s) * np.exp((c_safe - mu) / s)
e_above = mu - 0.5 * (c_safe + s) * np.exp(-(c_safe - mu) / s)
finite_result = np.where(below_mu, e_below, e_above)

return np.where(is_neginf, 0.0, np.where(is_posinf, mu, finite_result))

pe_upper = _partial_expectation(upper)
pe_lower = _partial_expectation(lower)

cdf_lower = self._cdf(lower)
cdf_upper = self._cdf(upper)
denom = cdf_upper - cdf_lower
safe_denom = np.where(np.abs(denom) < 1e-15, np.nan, denom)

return (pe_upper - pe_lower) / safe_denom

def _var(self):
r"""Return element/entry-wise variance of the distribution.

Expand Down
55 changes: 54 additions & 1 deletion skpro/distributions/normal.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,16 @@ class Normal(BaseDistribution):
# estimator tags
# --------------
"capabilities:approx": ["pdfnorm"],
"capabilities:exact": ["mean", "var", "energy", "pdf", "log_pdf", "cdf", "ppf"],
"capabilities:exact": [
"mean",
"var",
"energy",
"pdf",
"log_pdf",
"cdf",
"ppf",
"truncated_mean",
],
"distr:measuretype": "continuous",
"distr:paramtype": "parametric",
"broadcast_init": "on",
Expand Down Expand Up @@ -113,6 +122,50 @@ def _mean(self):
"""
return self._bc_params["mu"]

def _truncated_mean(self, lower, upper):
r"""Return expected value of the distribution truncated to [lower, upper].

For :math:`X \sim N(\mu, \sigma^2)`:

.. math::

\mathbb{E}[X \mid a < X < b]
= \mu + \sigma \frac{\phi(\alpha) - \phi(\beta)}
{\Phi(\beta) - \Phi(\alpha)}

where :math:`\alpha = (a - \mu)/\sigma`,
:math:`\beta = (b - \mu)/\sigma`,
:math:`\phi` is the standard normal pdf,
and :math:`\Phi` is the standard normal cdf.

Parameters
----------
lower : 2D np.ndarray, same shape as ``self``
lower truncation bound
upper : 2D np.ndarray, same shape as ``self``
upper truncation bound

Returns
-------
2D np.ndarray, same shape as ``self``
truncated expected value of distribution (entry-wise)
"""
mu = self._bc_params["mu"]
sigma = self._bc_params["sigma"]

alpha = (lower - mu) / sigma
beta = (upper - mu) / sigma

phi_alpha = np.exp(-0.5 * alpha**2) / np.sqrt(2 * np.pi)
phi_beta = np.exp(-0.5 * beta**2) / np.sqrt(2 * np.pi)
Phi_alpha = 0.5 * (1 + erf(alpha / np.sqrt(2)))
Phi_beta = 0.5 * (1 + erf(beta / np.sqrt(2)))

denom = Phi_beta - Phi_alpha
safe_denom = np.where(np.abs(denom) < 1e-15, np.nan, denom)

return mu + sigma * (phi_alpha - phi_beta) / safe_denom

def _var(self):
r"""Return element/entry-wise variance of the distribution.

Expand Down
Loading
Loading