Fixing pre-commit errors.

afkanpour · afkanpour · commit 5df554f9c699 · 2025-05-12T12:02:35.000-04:00
diff --git a/src/dimensionality_reduction.py b/src/dimensionality_reduction.py
@@ -1,21 +1,22 @@
 import logging  # noqa: D100
 from abc import ABC, abstractmethod
-from typing import List
+from typing import Any, List, Tuple
 
 import numpy as np
 import torch
+from numpy.typing import NDArray
 from sklearn.decomposition import PCA
 from sklearn.manifold import TSNE
 
 
 logger = logging.getLogger(__name__)
 
 
-def _global_min_max(x):
-    return np.min(x), np.max(x)
+def _global_min_max(x: NDArray[Any]) -> Tuple[float, float]:
+    return float(np.min(x)), float(np.max(x))
 
 
-def _normalize(x, min_value, max_value):
+def _normalize(x: NDArray[Any], min_value: float, max_value: float) -> NDArray[Any]:
     return 2 * (x - min_value) / (max_value - min_value) - 1.0
 
 
@@ -37,8 +38,8 @@ def __init__(
         self.random_seed = random_seed
         self.normalize_output = normalize_output
         # To be used for normalization.
-        self.normalization_lower_bound = None
-        self.normalization_upper_bound = None
+        self.normalization_lower_bound = 0.0
+        self.normalization_upper_bound = 0.0
 
         # Set torch random seed for reproducibility.
         torch.manual_seed(random_seed)
@@ -212,40 +213,46 @@ def __init__(
     def fit_transform(self, embeddings: List[torch.Tensor]) -> List[torch.Tensor]:
         """Apply the CutEmbeddings dimensionality reduction to the train data."""
         # Cut the embeddings to the desired size
-        new_embeddings = [
-            embedding[: self.output_dimension_size] for embedding in embeddings
-        ]
-        new_embeddings = torch.stack(new_embeddings).numpy()
-        if self.normalize_output:
-            self.normalization_lower_bound, self.normalization_upper_bound = (
-                _global_min_max(new_embeddings)
-            )
-            return torch.Tensor(
-                _normalize(
-                    new_embeddings,
-                    min_value=self.normalization_lower_bound,
-                    max_value=self.normalization_upper_bound,
-                )
-            )
+        cut_embeddings = [embedding[: self.output_dimension_size] for embedding in embeddings]
+
+        if not self.normalize_output:
+            return cut_embeddings
+
+        # Convert to numpy for normalization
+        np_embeddings = torch.stack(cut_embeddings).numpy()
+        self.normalization_lower_bound, self.normalization_upper_bound = (
+            _global_min_max(np_embeddings)
+        )
+        normalized = _normalize(
+            np_embeddings,
+            min_value=self.normalization_lower_bound,
+            max_value=self.normalization_upper_bound,
+        )
+        return [torch.Tensor(x) for x in normalized]
 
     def transform_new_points(
         self, new_embeddings: List[torch.Tensor]
     ) -> List[torch.Tensor]:
         """Apply the CutEmbeddings dimensionality reduction to the test data."""
         # Cut the new points to the desired size
-        new_embeddings = [
+        cut_embeddings = [
             embedding[: self.output_dimension_size] for embedding in new_embeddings
         ]
-        return [
-            torch.Tensor(
-                _normalize(
-                    embedding,
-                    min_value=self.normalization_lower_bound,
-                    max_value=self.normalization_upper_bound,
-                )
+
+        if not self.normalize_output:
+            return cut_embeddings
+
+        # Convert to numpy for normalization
+        normalized_results = []
+        for embedding in cut_embeddings:
+            np_embedding = embedding.numpy()
+            normalized = _normalize(
+                np_embedding,
+                min_value=self.normalization_lower_bound,
+                max_value=self.normalization_upper_bound,
             )
-            for embedding in new_embeddings
-        ]
+            normalized_results.append(torch.Tensor(normalized))
+        return normalized_results
 
 
 class Pca(DimensionalityReductionMethod):
diff --git a/src/lbo.py b/src/lbo.py
@@ -389,7 +389,8 @@ def calculate_lbo_error(
 
     Returns
     -------
-        Tuple[float, float]: RMSE and average standard deviation of candidate capabilities.
+        Tuple[float, float]: RMSE and average standard deviation of candidate
+        capabilities.
     """
     # Get the capability embeddings
     capabilities_encoding = torch.stack(
@@ -401,9 +402,7 @@ def calculate_lbo_error(
         [cap.scores[subject_llm_name]["mean"] for cap in capabilities]
     )
 
-    preds = lbo_model(capabilities_encoding)
-    preds_mean = preds.mean
-    preds_std = preds.variance.sqrt()
+    preds_mean, preds_std = lbo_model.predict(capabilities_encoding)
     rmse = torch.sqrt(torch.mean((preds_mean - capability_scores) ** 2)).item()
     avg_std = torch.mean(preds_std).item()
 
@@ -418,7 +417,7 @@ def select_capabilities_using_lbo(
     subject_llm_name: str,
     acquisition_function: str = "expected_variance_reduction",
     num_lbo_iterations: int | None = None,
-) -> Tuple[List[Capability], Dict[int, Any]]:
+) -> Tuple[List[Capability], Dict[str, List[float]]]:
     """
     Select capabilities using the Latent Bayesian Optimization (LBO) method.
 
@@ -458,7 +457,7 @@ def select_capabilities_using_lbo(
         acquisition_function=acquisition_function,
     )
 
-    error_dict = {"rmse": [], "avg_std": []}
+    error_dict: Dict[str, List[float]] = {"rmse": [], "avg_std": []}
     # Get initial test error.
     rmse, avg_std = calculate_lbo_error(
         lbo_model=lbo,
diff --git a/tests/src/test_capability_embedding.py b/tests/src/test_capability_embedding.py
@@ -63,7 +63,7 @@ def test_apply_dim_reduction_tsne(mock_capabilities):
 
 
 def test_apply_dim_reduction_pca(mock_capabilities):
-    """Test the apply_dimensionality_reduction function For the PCA method."""
+    """Test the apply_dimensionality_reduction function for the PCA method."""
     dimensionality_reduction_method = "pca"
     output_dimensions = 2
     embedding_model_name = "text-embedding-3-small"
@@ -96,7 +96,7 @@ def test_apply_dim_reduction_pca(mock_capabilities):
 
 
 def test_apply_dim_reduction_cut_embeddings(mock_capabilities):
-    """Test the apply_dimensionality_reduction function For the cut-embeddings method."""
+    """Test apply_dimensionality_reduction for the cut-embeddings method."""
     dimensionality_reduction_method = "cut-embeddings"
     output_dimensions = 2
     embedding_model_name = "text-embedding-3-small"