meta-pytorch
diff --git a/‎botorch/utils/probability/bvn.py‎
Lines changed: 24 additions & 22 deletions b/‎botorch/utils/probability/bvn.py‎
Lines changed: 24 additions & 22 deletions
diff --git a/‎botorch/utils/probability/lin_ess.py‎
Lines changed: 11 additions & 7 deletions b/‎botorch/utils/probability/lin_ess.py‎
Lines changed: 11 additions & 7 deletions
diff --git a/‎botorch/utils/probability/linalg.py‎
Lines changed: 7 additions & 9 deletions b/‎botorch/utils/probability/linalg.py‎
Lines changed: 7 additions & 9 deletions
diff --git a/‎botorch/utils/probability/truncated_multivariate_normal.py‎
Lines changed: 1 addition & 1 deletion b/‎botorch/utils/probability/truncated_multivariate_normal.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎botorch/utils/probability/unified_skew_normal.py‎
Lines changed: 24 additions & 14 deletions b/‎botorch/utils/probability/unified_skew_normal.py‎
Lines changed: 24 additions & 14 deletions
diff --git a/‎sphinx/source/acquisition.rst‎
Lines changed: 6 additions & 6 deletions b/‎sphinx/source/acquisition.rst‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎sphinx/source/utils.rst‎
Lines changed: 48 additions & 0 deletions b/‎sphinx/source/utils.rst‎
Lines changed: 48 additions & 0 deletions
@@ -92,16 +92,16 @@ def bvn(r: Tensor, xl: Tensor, yl: Tensor, xu: Tensor, yu: Tensor) -> Tensor:
 def bvnu(r: Tensor, h: Tensor, k: Tensor) -> Tensor:
     r"""Solves for `P(x > h, y > k)` where `x` and `y` are standard bivariate normal
     random variables with correlation coefficient `r`. In [Genz2004bvnt]_, this is (1)
-    ```
-    L(h, k, r) = P(x < -h, y < -k)
-               = 1/(a 2\pi) \int_{h}^{\infty} \int_{k}^{\infty} f(x, y, r) dy dx,
-    ```
+
+        `L(h, k, r) = P(x < -h, y < -k) \
+        = 1/(a 2\pi) \int_{h}^{\infty} \int_{k}^{\infty} f(x, y, r) dy dx,`
+
     where `f(x, y, r) = e^{-1/(2a^2) (x^2 - 2rxy + y^2)}` and `a = (1 - r^2)^{1/2}`.
 
     [Genz2004bvnt]_ report the following integation scheme incurs a maximum of 5e-16
-    error when run in double precision: if |r| >= 0.925, use a 20-point quadrature rule
-    on a 5th order Taylor expansion; else, numerically integrate in polar coordinates
-    using no more than 20 quadrature points.
+    error when run in double precision: if `|r| >= 0.925`, use a 20-point quadrature
+    rule on a 5th order Taylor expansion; else, numerically integrate in polar
+    coordinates using no more than 20 quadrature points.
 
     Args:
         r: Tensor of correlation coefficients.
@@ -137,10 +137,10 @@ def _bvnu_polar(
     r: Tensor, h: Tensor, k: Tensor, num_points: Optional[int] = None
 ) -> Tensor:
     r"""Solves for `P(x > h, y > k)` by integrating in polar coordinates as
-    ```
-        L(h, k, r) = \Phi(-h)\Phi(-k) + 1/(2\pi) \int_{0}^{sin^{-1}(r)} f(t) dt
-        f(t) = e^{-0.5 cos(t)^{-2} (h^2 + k^2 - 2hk sin(t))}
-    ```
+
+        `L(h, k, r) = \Phi(-h)\Phi(-k) + 1/(2\pi) \int_{0}^{sin^{-1}(r)} f(t) dt \
+        f(t) = e^{-0.5 cos(t)^{-2} (h^2 + k^2 - 2hk sin(t))}`
+
     For details, see Section 2.2 of [Genz2004bvnt]_.
     """
     if num_points is None:
@@ -168,12 +168,13 @@ def _bvnu_taylor(r: Tensor, h: Tensor, k: Tensor, num_points: int = 20) -> Tenso
     r"""Solves for `P(x > h, y > k)` via Taylor expansion.
 
     Per Section 2.3 of [Genz2004bvnt]_, the bvnu equation (1) may be rewritten as
-    ```
-        L(h, k, r) = L(h, k, s) - s/(2\pi) \int_{0}^{a} f(x) dx
-        f(x) = (1 - x^2){-1/2} e^{-0.5 ((h - sk)/ x)^2} e^{-shk/(1 + (1 - x^2)^{1/2})},
-    ```
+
+        `L(h, k, r) = L(h, k, s) - s/(2\pi) \int_{0}^{a} f(x) dx \
+        f(x) = (1 - x^2){-1/2} e^{-0.5 ((h - sk)/ x)^2} e^{-shk/(1 + (1 - x^2)^{1/2})},`
+
     where `s = sign(r)` and `a = sqrt(1 - r^{2})`. The term `L(h, k, s)` is analytic.
-    The second integral is approximated via Taylor expansion.
+    The second integral is approximated via Taylor expansion. See Sections 2.3 and
+    2.4 of [Genz2004bvnt]_.
     """
     _0, _1, _ni2, _i2pi, _sq2pi = get_constants_like(
         values=(0, 1, -0.5, _inv_2pi, _sqrt_2pi), ref=r
@@ -246,13 +247,13 @@ def bvnmom(
     r"""Computes the expected values of truncated, bivariate normal random variables.
 
     Let `x` and `y` be a pair of standard bivariate normal random variables having
-    correlation `r`. This function computes `E([x,y] | [xl,yl] < [x,y] < [xu,yu])`.
+    correlation `r`. This function computes `E([x,y] \| [xl,yl] < [x,y] < [xu,yu])`.
 
     Following [Muthen1990moments]_ equations (4) and (5), we have
-    ```
-    E(x | [xl, yl] < [x, y] < [xu, yu])
-        = Z^{-1} \phi(xl) P(yl < y < yu | x=xl) - \phi(xu) P(yl < y < yu | x=xu)
-    ```
+
+        `E(x \| [xl, yl] < [x, y] < [xu, yu]) \
+        = Z^{-1} \phi(xl) P(yl < y < yu \| x=xl) - \phi(xu) P(yl < y < yu \| x=xu),`
+
     where `Z = P([xl, yl] < [x, y] < [xu, yu])` and `\phi` is the standard normal PDF.
 
     Args:
@@ -264,7 +265,8 @@ def bvnmom(
         p: Tensor of probabilities `P(xl < x < xu, yl < y < yu)`, same shape as `r`.
 
     Returns:
-        `E(x | [xl, yl] < [x, y] < [xu, yu])` and `E(y | [xl, yl] < [x, y] < [xu, yu])`.
+        `E(x \| [xl, yl] < [x, y] < [xu, yu])` and
+        `E(y \| [xl, yl] < [x, y] < [xu, yu])`.
     """
     if not (r.shape == xl.shape == xu.shape == yl.shape == yu.shape):
         raise UnsupportedError("Arguments to `bvn` must have the same shape.")
 
@@ -94,13 +94,13 @@ def __init__(
             try:
                 covariance_root = torch.linalg.cholesky(covariance_matrix)
             except RuntimeError as e:
-                if "positive-definite" in str(e):
-                    raise ValueError(
+                raise_e = e
+                if "positive-definite" in str(raise_e):
+                    raise_e = ValueError(
                         "Covariance matrix is not positive definite. "
                         "Currently only non-degenerate distributions are supported."
                     )
-                else:
-                    raise e
+                raise raise_e
         self._covariance_root = covariance_root
         self._x = self.x0.clone()  # state of the sampler ("current point")
         # We will need the following repeatedly, let's allocate them once
@@ -216,11 +216,12 @@ def _find_active_intersections(self, nu: Tensor) -> Tensor:
             nu=nu, theta=theta, delta_theta=_delta_theta
         )
         theta_active = theta[active_directions.nonzero()]
-
+        delta_theta = _delta_theta
         while theta_active.numel() % 2 == 1:
             # Almost tangential ellipses, reduce delta_theta
+            delta_theta /= 10
             active_directions = self._index_active(
-                theta=theta, nu=nu, delta_theta=0.1 * _delta_theta
+                theta=theta, nu=nu, delta_theta=delta_theta
             )
             theta_active = theta[active_directions.nonzero()]
 
@@ -236,6 +237,9 @@ def _find_intersection_angles(self, nu: Tensor) -> Tensor:
         """Compute all of the up to 2*n_ineq_con intersections of the ellipse
         and the linear constraints.
 
+        For background, see equation (2) in
+        http://proceedings.mlr.press/v108/gessner20a/gessner20a.pdf
+
         Args:
             nu: A `d x 1`-dim tensor (the "new" direction, drawn from N(0, I)).
 
@@ -264,7 +268,7 @@ def _find_intersection_angles(self, nu: Tensor) -> Tensor:
         return torch.sort(theta).values
 
     def _index_active(
-        self, nu: Tensor, theta: Tensor, delta_theta: float = 1e-4
+        self, nu: Tensor, theta: Tensor, delta_theta: float = _delta_theta
     ) -> Tensor:
         r"""Determine active indices.
 
 
@@ -50,9 +50,8 @@ def augment_cholesky(
         raise ValueError("One and only one of `Kba` or `Lba` must be provided.")
 
     if jitter is not None:
-        diag = Kbb.diagonal(dim1=-2, dim2=-1)
         Kbb = Kbb.clone()
-        Kbb.fill_diagonal_(diag + jitter)
+        Kbb.diagonal(dim1=-2, dim2=-1).add_(jitter)
 
     if Lba is None:
         Lba = torch.linalg.solve_triangular(
@@ -62,7 +61,7 @@ def augment_cholesky(
     Lbb, info = torch.linalg.cholesky_ex(Kbb - Lba @ Lba.transpose(-2, -1))
     if info.any():
         raise NotPSDError(
-            "Schur complement of `K` with respect to `Kaa` not PSD for the given"
+            "Schur complement of `K` with respect to `Kaa` not PSD for the given "
             "Cholesky factor `Laa`"
             f"{'.' if jitter is None else f' and nugget jitter={jitter}.'}"
         )
@@ -85,19 +84,19 @@ def __post_init__(self, validate_init: bool = True):
 
         if self.tril.shape[-2] != self.tril.shape[-1]:
             raise ValueError(
-                f"Expected square matrices but `matrix` has shape {self.tril.shape}."
+                f"Expected square matrices but `matrix` has shape `{self.tril.shape}`."
             )
 
         if self.perm.shape != self.tril.shape[:-1]:
             raise ValueError(
                 f"`perm` of shape `{self.perm.shape}` incompatible with "
-                f"`matrix` of shape `{self.tril.shape}."
+                f"`matrix` of shape `{self.tril.shape}`."
             )
 
         if self.diag is not None and self.diag.shape != self.tril.shape[:-1]:
             raise ValueError(
                 f"`diag` of shape `{self.diag.shape}` incompatible with "
-                f"`matrix` of shape `{self.tril.shape}."
+                f"`matrix` of shape `{self.tril.shape}`."
             )
 
     def __getitem__(self, key: Any) -> PivotedCholesky:
@@ -135,9 +134,8 @@ def pivot_(self, pivot: LongTensor) -> None:
         # Perform basic swaps
         for key in ("perm", "diag"):
             tnsr = getattr(self, key, None)
-            if tnsr is None:
-                continue
-            swap_along_dim_(tnsr, i=self.step, j=pivot, dim=pivot.ndim)
+            if tnsr is not None:
+                swap_along_dim_(tnsr, i=self.step, j=pivot, dim=tnsr.ndim - 1)
 
         # Perform matrix swaps; prealloacte buffers for row/column linear indices
         size2 = size**2
 
@@ -145,4 +145,4 @@ def expand(
         return new
 
     def __repr__(self) -> str:
-        return super().__repr__()[:-1] + f"bounds: {self.bounds.shape})"
+        return super().__repr__()[:-1] + f", bounds: {self.bounds.shape})"
@@ -7,14 +7,16 @@
 from __future__ import annotations
 
 from inspect import getmembers
-from typing import Optional, Sequence
+from typing import Optional, Sequence, Union
 
 import torch
 from botorch.utils.probability.linalg import augment_cholesky, block_matrix_concat
 from botorch.utils.probability.mvnxpb import MVNXPB
 from botorch.utils.probability.truncated_multivariate_normal import (
     TruncatedMultivariateNormal,
 )
+from linear_operator.operators import LinearOperator
+from linear_operator.utils.errors import NotPSDError
 from torch import Tensor
 from torch.distributions.multivariate_normal import Distribution, MultivariateNormal
 from torch.distributions.utils import lazy_property
@@ -28,7 +30,7 @@ def __init__(
         self,
         trunc: TruncatedMultivariateNormal,
         gauss: MultivariateNormal,
-        cross_covariance_matrix: Tensor,
+        cross_covariance_matrix: Union[Tensor, LinearOperator],
         validate_args: Optional[bool] = None,
     ):
         r"""Unified Skew Normal distribution of `Y | a < X < b` for jointly Gaussian
@@ -52,7 +54,10 @@ def __init__(
                 f"{len(trunc.event_shape)}-dimensional `trunc` incompatible with"
                 f"{len(gauss.event_shape)}-dimensional `gauss`."
             )
-
+        # LinearOperator currently doesn't support torch.linalg.solve_triangular,
+        # so for the time being, we cast the operator to dense here
+        if isinstance(cross_covariance_matrix, LinearOperator):
+            cross_covariance_matrix = cross_covariance_matrix.to_dense()
         try:
             batch_shape = torch.broadcast_shapes(trunc.batch_shape, gauss.batch_shape)
         except RuntimeError as e:
@@ -66,13 +71,21 @@ def __init__(
         self.trunc = trunc
         self.gauss = gauss
         self.cross_covariance_matrix = cross_covariance_matrix
-        if validate_args:
+        if self._validate_args:
             try:
+                # calling _orthogonalized_gauss first makes the following call
+                # _orthogonalized_gauss.scale_tril which is used by self.rsample
                 self._orthogonalized_gauss
                 self.scale_tril
-            except RuntimeError as e:
-                if "positive-definite" in str(e):
-                    raise ValueError(
+            except Exception as e:
+                # error could be thrown by linalg.augment_cholesky (NotPSDError)
+                # or torch.linalg.cholesky (with "positive-definite" in the message)
+                if (
+                    isinstance(e, NotPSDError)
+                    or "positive-definite" in str(e)
+                    or "PositiveDefinite" in str(e)
+                ):
+                    e = ValueError(
                         "UnifiedSkewNormal is only well-defined for positive definite"
                         " joint covariance matrices."
                     )
@@ -158,7 +171,10 @@ def expand(
             elif isinstance(obj, Distribution):
                 new_obj = obj.expand(batch_shape=batch_shape)
             else:
-                raise TypeError
+                raise TypeError(
+                    f"Type {type(obj)} of UnifiedSkewNormal's lazy property "
+                    f"{name} not supported."
+                )
 
             setattr(new, name, new_obj)
         return new
@@ -203,12 +219,6 @@ def _orthogonalized_gauss(self) -> MultivariateNormal:
             parameters["covariance_matrix"] = (
                 self.gauss.covariance_matrix - beta.transpose(-1, -2) @ beta
             )
-            return MultivariateNormal(
-                loc=torch.zeros_like(self.gauss.loc),
-                scale_tril=self.scale_tril[..., -n:, -n:],
-                validate_args=self._validate_args,
-            )
-
         return MultivariateNormal(**parameters, validate_args=self._validate_args)
 
     @lazy_property
 
@@ -141,32 +141,32 @@ Utilities
 -------------------------------------------
 
 Fixed Feature Acquisition Function
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.fixed_feature
     :members:
 
 Constructors for Acquisition Function Input Arguments
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.input_constructors
     :members:
 
 Penalized Acquisition Function Wrapper
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.penalized
     :members:
 
 Proximal Acquisition Function Wrapper
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.proximal
     :members:
 
 General Utilities for Acquisition Functions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.utils
     :members:
 
 
 Multi-Objective Utilities for Acquisition Functions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.multi_objective.utils
     :members:
@@ -72,6 +72,16 @@ Feasible Volume
 .. automodule:: botorch.utils.feasible_volume
 		:members:
 
+Constants
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.constants
+		:members:
+
+Safe Math
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.safe_math
+		:members:
+
 Multi-Objective Utilities
 -------------------------------------------
 
@@ -114,3 +124,41 @@ Scalarization
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.utils.multi_objective.scalarization
 		:members:
+
+Probability Utilities
+-------------------------------------------
+
+Multivariate Gaussian Probabilities via Bivariate Conditioning
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.probability.mvnxpb
+    	:members:
+
+Truncated Multivariate Normal Distribution
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.probability.truncated_multivariate_normal
+    	:members:
+
+Unified Skew Normal Distribution
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.probability.unified_skew_normal
+    	:members:
+
+Bivariate Normal Probabilities and Statistics
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.probability.bvn
+    	:members:
+
+Elliptic Slice Sampler with Linear Constraints
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.probability.lin_ess
+    	:members:
+
+Linear Algebra Helpers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.probability.linalg
+    	:members:
+
+Probability Helpers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.probability.utils
+    	:members: