11import logging # noqa: D100
22from abc import ABC , abstractmethod
3- from typing import List
3+ from typing import Any , List , Tuple
44
55import numpy as np
66import torch
7+ from numpy .typing import NDArray
78from sklearn .decomposition import PCA
89from sklearn .manifold import TSNE
910
1011
1112logger = logging .getLogger (__name__ )
1213
1314
14- def _global_min_max (x ) :
15- return np .min (x ), np .max (x )
15+ def _global_min_max (x : NDArray [ Any ]) -> Tuple [ float , float ] :
16+ return float ( np .min (x )), float ( np .max (x ) )
1617
1718
18- def _normalize (x , min_value , max_value ) :
19+ def _normalize (x : NDArray [ Any ] , min_value : float , max_value : float ) -> NDArray [ Any ] :
1920 return 2 * (x - min_value ) / (max_value - min_value ) - 1.0
2021
2122
@@ -37,8 +38,8 @@ def __init__(
3738 self .random_seed = random_seed
3839 self .normalize_output = normalize_output
3940 # To be used for normalization.
40- self .normalization_lower_bound = None
41- self .normalization_upper_bound = None
41+ self .normalization_lower_bound = 0.0
42+ self .normalization_upper_bound = 0.0
4243
4344 # Set torch random seed for reproducibility.
4445 torch .manual_seed (random_seed )
@@ -212,40 +213,46 @@ def __init__(
212213 def fit_transform (self , embeddings : List [torch .Tensor ]) -> List [torch .Tensor ]:
213214 """Apply the CutEmbeddings dimensionality reduction to the train data."""
214215 # Cut the embeddings to the desired size
215- new_embeddings = [
216- embedding [: self .output_dimension_size ] for embedding in embeddings
217- ]
218- new_embeddings = torch .stack (new_embeddings ).numpy ()
219- if self .normalize_output :
220- self .normalization_lower_bound , self .normalization_upper_bound = (
221- _global_min_max (new_embeddings )
222- )
223- return torch .Tensor (
224- _normalize (
225- new_embeddings ,
226- min_value = self .normalization_lower_bound ,
227- max_value = self .normalization_upper_bound ,
228- )
229- )
216+ cut_embeddings = [embedding [: self .output_dimension_size ] for embedding in embeddings ]
217+
218+ if not self .normalize_output :
219+ return cut_embeddings
220+
221+ # Convert to numpy for normalization
222+ np_embeddings = torch .stack (cut_embeddings ).numpy ()
223+ self .normalization_lower_bound , self .normalization_upper_bound = (
224+ _global_min_max (np_embeddings )
225+ )
226+ normalized = _normalize (
227+ np_embeddings ,
228+ min_value = self .normalization_lower_bound ,
229+ max_value = self .normalization_upper_bound ,
230+ )
231+ return [torch .Tensor (x ) for x in normalized ]
230232
231233 def transform_new_points (
232234 self , new_embeddings : List [torch .Tensor ]
233235 ) -> List [torch .Tensor ]:
234236 """Apply the CutEmbeddings dimensionality reduction to the test data."""
235237 # Cut the new points to the desired size
236- new_embeddings = [
238+ cut_embeddings = [
237239 embedding [: self .output_dimension_size ] for embedding in new_embeddings
238240 ]
239- return [
240- torch .Tensor (
241- _normalize (
242- embedding ,
243- min_value = self .normalization_lower_bound ,
244- max_value = self .normalization_upper_bound ,
245- )
241+
242+ if not self .normalize_output :
243+ return cut_embeddings
244+
245+ # Convert to numpy for normalization
246+ normalized_results = []
247+ for embedding in cut_embeddings :
248+ np_embedding = embedding .numpy ()
249+ normalized = _normalize (
250+ np_embedding ,
251+ min_value = self .normalization_lower_bound ,
252+ max_value = self .normalization_upper_bound ,
246253 )
247- for embedding in new_embeddings
248- ]
254+ normalized_results . append ( torch . Tensor ( normalized ))
255+ return normalized_results
249256
250257
251258class Pca (DimensionalityReductionMethod ):
0 commit comments