Merge pull request #12 from amreis/improve-stability

rudolfwilliam · web-flow · commit edd19b7cfe38 · 2025-11-05T21:05:33.000+01:00
Improve stability and composability
diff --git a/torchkde/modules.py b/torchkde/modules.py
@@ -5,10 +5,10 @@
 from .utils import ensure_two_dimensional, check_if_mat
 from .algorithms import RootTree, SUPPORTED_ALGORITHMS
 from .bandwidths import SUPPORTED_BANDWIDTHS, compute_bandwidth
-from .kernels import (GaussianKernel, 
-                      EpanechnikovKernel, 
-                      ExponentialKernel, 
-                      TopHatKernel, 
+from .kernels import (GaussianKernel,
+                      EpanechnikovKernel,
+                      ExponentialKernel,
+                      TopHatKernel,
                       VonMisesFisherKernel,
                       SUPPORTED_KERNELS)
 
@@ -28,7 +28,7 @@
 
 
 class KernelDensity(nn.Module):
-    """Analag to the KernelDensity class in sklearn.neighbors 
+    """Analag to the KernelDensity class in sklearn.neighbors
     (see https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/neighbors/_kde.py)."""
 
     def __init__(
@@ -37,7 +37,8 @@ def __init__(
         bandwidth: Union[float, str] = 1.0,
         algorithm: str = "standard",
         kernel: str = "gaussian",
-        kernel_kwargs: dict = None
+        kernel_kwargs: dict = None,
+        eps: float = 0.0,
     ) -> None:
         """Initialize the KernelDensity estimator.
 
@@ -51,7 +52,10 @@ def __init__(
             The kernel to use for density estimation.
         kernel_kwargs : dict, optional
             Additional keyword arguments for the kernel.
+        eps: float, optional
+            Small positive clamp for densities before log to avoid -inf.
         """
+        super().__init__()
         if not isinstance(bandwidth, str):
             assert bandwidth > 0, "Bandwidth must be positive."
             self.bandwidth = bandwidth**2 # square the bandwidth to match sklearn's implementation
@@ -69,19 +73,20 @@ def __init__(
         self.device = None
         self.n_features = None
         self.data = None
+        self.eps = eps
 
         if algorithm not in SUPPORTED_ALGORITHMS:
             raise ValueError(f"Algorithm {algorithm} not supported")
-        
+
         if kernel not in SUPPORTED_KERNELS:
             raise ValueError(f"Kernel {kernel} not supported")
 
         if not isinstance(bandwidth, (float, torch.Tensor)) and bandwidth not in SUPPORTED_BANDWIDTHS:
             raise ValueError(f"Bandwidth {bandwidth} not supported")
 
 
-    def fit(self, 
-            X: torch.Tensor, 
+    def fit(self,
+            X: torch.Tensor,
             sample_weight: Optional[torch.Tensor] = None
             ) -> 'KernelDensity':
         """Fit the Kernel Density model on the data.
@@ -141,7 +146,7 @@ def score_samples(self, X: torch.Tensor, batch_size: int = 128) -> torch.Tensor:
         """
         assert self.is_fitted, "Model must be fitted before scoring samples."
         assert X.device == self.device, "Device of the query data must be on the same device as the data for fitting the estimator."
-        
+
         n_samples = X.shape[0]
         # Compute log-density estimation with a kernel function
         log_density = []
@@ -156,7 +161,7 @@ def score_samples(self, X: torch.Tensor, batch_size: int = 128) -> torch.Tensor:
             density = ((self.sample_weight * kernel_values).sum(-1) * self.kernel_module.norm_constant) \
                         / self.sample_weight.sum()
             # Compute the log-density
-            log_density.append(density.log())
+            log_density.append(density.clamp(min=self.eps).log())
 
         # Convert the list of log-density values into a tensor
         log_density = torch.cat(log_density, dim=0)