Skip to content

Commit 5df554f

Browse files
committed
Fixing pre-commit errors.
1 parent 201742f commit 5df554f

File tree

3 files changed

+45
-39
lines changed

3 files changed

+45
-39
lines changed

src/dimensionality_reduction.py

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,22 @@
11
import logging # noqa: D100
22
from abc import ABC, abstractmethod
3-
from typing import List
3+
from typing import Any, List, Tuple
44

55
import numpy as np
66
import torch
7+
from numpy.typing import NDArray
78
from sklearn.decomposition import PCA
89
from sklearn.manifold import TSNE
910

1011

1112
logger = logging.getLogger(__name__)
1213

1314

14-
def _global_min_max(x):
15-
return np.min(x), np.max(x)
15+
def _global_min_max(x: NDArray[Any]) -> Tuple[float, float]:
16+
return float(np.min(x)), float(np.max(x))
1617

1718

18-
def _normalize(x, min_value, max_value):
19+
def _normalize(x: NDArray[Any], min_value: float, max_value: float) -> NDArray[Any]:
1920
return 2 * (x - min_value) / (max_value - min_value) - 1.0
2021

2122

@@ -37,8 +38,8 @@ def __init__(
3738
self.random_seed = random_seed
3839
self.normalize_output = normalize_output
3940
# To be used for normalization.
40-
self.normalization_lower_bound = None
41-
self.normalization_upper_bound = None
41+
self.normalization_lower_bound = 0.0
42+
self.normalization_upper_bound = 0.0
4243

4344
# Set torch random seed for reproducibility.
4445
torch.manual_seed(random_seed)
@@ -212,40 +213,46 @@ def __init__(
212213
def fit_transform(self, embeddings: List[torch.Tensor]) -> List[torch.Tensor]:
213214
"""Apply the CutEmbeddings dimensionality reduction to the train data."""
214215
# Cut the embeddings to the desired size
215-
new_embeddings = [
216-
embedding[: self.output_dimension_size] for embedding in embeddings
217-
]
218-
new_embeddings = torch.stack(new_embeddings).numpy()
219-
if self.normalize_output:
220-
self.normalization_lower_bound, self.normalization_upper_bound = (
221-
_global_min_max(new_embeddings)
222-
)
223-
return torch.Tensor(
224-
_normalize(
225-
new_embeddings,
226-
min_value=self.normalization_lower_bound,
227-
max_value=self.normalization_upper_bound,
228-
)
229-
)
216+
cut_embeddings = [embedding[: self.output_dimension_size] for embedding in embeddings]
217+
218+
if not self.normalize_output:
219+
return cut_embeddings
220+
221+
# Convert to numpy for normalization
222+
np_embeddings = torch.stack(cut_embeddings).numpy()
223+
self.normalization_lower_bound, self.normalization_upper_bound = (
224+
_global_min_max(np_embeddings)
225+
)
226+
normalized = _normalize(
227+
np_embeddings,
228+
min_value=self.normalization_lower_bound,
229+
max_value=self.normalization_upper_bound,
230+
)
231+
return [torch.Tensor(x) for x in normalized]
230232

231233
def transform_new_points(
232234
self, new_embeddings: List[torch.Tensor]
233235
) -> List[torch.Tensor]:
234236
"""Apply the CutEmbeddings dimensionality reduction to the test data."""
235237
# Cut the new points to the desired size
236-
new_embeddings = [
238+
cut_embeddings = [
237239
embedding[: self.output_dimension_size] for embedding in new_embeddings
238240
]
239-
return [
240-
torch.Tensor(
241-
_normalize(
242-
embedding,
243-
min_value=self.normalization_lower_bound,
244-
max_value=self.normalization_upper_bound,
245-
)
241+
242+
if not self.normalize_output:
243+
return cut_embeddings
244+
245+
# Convert to numpy for normalization
246+
normalized_results = []
247+
for embedding in cut_embeddings:
248+
np_embedding = embedding.numpy()
249+
normalized = _normalize(
250+
np_embedding,
251+
min_value=self.normalization_lower_bound,
252+
max_value=self.normalization_upper_bound,
246253
)
247-
for embedding in new_embeddings
248-
]
254+
normalized_results.append(torch.Tensor(normalized))
255+
return normalized_results
249256

250257

251258
class Pca(DimensionalityReductionMethod):

src/lbo.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,8 @@ def calculate_lbo_error(
389389
390390
Returns
391391
-------
392-
Tuple[float, float]: RMSE and average standard deviation of candidate capabilities.
392+
Tuple[float, float]: RMSE and average standard deviation of candidate
393+
capabilities.
393394
"""
394395
# Get the capability embeddings
395396
capabilities_encoding = torch.stack(
@@ -401,9 +402,7 @@ def calculate_lbo_error(
401402
[cap.scores[subject_llm_name]["mean"] for cap in capabilities]
402403
)
403404

404-
preds = lbo_model(capabilities_encoding)
405-
preds_mean = preds.mean
406-
preds_std = preds.variance.sqrt()
405+
preds_mean, preds_std = lbo_model.predict(capabilities_encoding)
407406
rmse = torch.sqrt(torch.mean((preds_mean - capability_scores) ** 2)).item()
408407
avg_std = torch.mean(preds_std).item()
409408

@@ -418,7 +417,7 @@ def select_capabilities_using_lbo(
418417
subject_llm_name: str,
419418
acquisition_function: str = "expected_variance_reduction",
420419
num_lbo_iterations: int | None = None,
421-
) -> Tuple[List[Capability], Dict[int, Any]]:
420+
) -> Tuple[List[Capability], Dict[str, List[float]]]:
422421
"""
423422
Select capabilities using the Latent Bayesian Optimization (LBO) method.
424423
@@ -458,7 +457,7 @@ def select_capabilities_using_lbo(
458457
acquisition_function=acquisition_function,
459458
)
460459

461-
error_dict = {"rmse": [], "avg_std": []}
460+
error_dict: Dict[str, List[float]] = {"rmse": [], "avg_std": []}
462461
# Get initial test error.
463462
rmse, avg_std = calculate_lbo_error(
464463
lbo_model=lbo,

tests/src/test_capability_embedding.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def test_apply_dim_reduction_tsne(mock_capabilities):
6363

6464

6565
def test_apply_dim_reduction_pca(mock_capabilities):
66-
"""Test the apply_dimensionality_reduction function For the PCA method."""
66+
"""Test the apply_dimensionality_reduction function for the PCA method."""
6767
dimensionality_reduction_method = "pca"
6868
output_dimensions = 2
6969
embedding_model_name = "text-embedding-3-small"
@@ -96,7 +96,7 @@ def test_apply_dim_reduction_pca(mock_capabilities):
9696

9797

9898
def test_apply_dim_reduction_cut_embeddings(mock_capabilities):
99-
"""Test the apply_dimensionality_reduction function For the cut-embeddings method."""
99+
"""Test apply_dimensionality_reduction for the cut-embeddings method."""
100100
dimensionality_reduction_method = "cut-embeddings"
101101
output_dimensions = 2
102102
embedding_model_name = "text-embedding-3-small"

0 commit comments

Comments
 (0)