11"""
22Measures for estimating the information density of a given sample.
33"""
4- from typing import Callable
4+ from typing import Callable , Union
55
66import numpy as np
77from scipy .spatial .distance import cosine , euclidean
8+ from sklearn .metrics .pairwise import pairwise_distances
89
910from modAL .utils .data import modALinput
1011
@@ -29,13 +30,13 @@ def sim(*args, **kwargs):
2930euclidean_similarity = similarize_distance (euclidean )
3031
3132
32- def information_density (X : modALinput , similarity_measure : Callable = cosine_similarity ) -> np .ndarray :
33+ def information_density (X : modALinput , metric : Union [ str , Callable ] = 'euclidean' ) -> np .ndarray :
3334 """
34- Calculates the information density metric of the given data using the similarity measure given.
35+ Calculates the information density metric of the given data using the given metric .
3536
3637 Args:
3738 X: The data for which the information density is to be calculated.
38- similarity_measure : The similarity measure to be used. Should take two 1d numpy.ndarrays for argument.
39+ metric : The metric to be used. Should take two 1d numpy.ndarrays for argument.
3940
4041 Todo:
4142 Should work with all possible modALinput.
@@ -44,8 +45,12 @@ def information_density(X: modALinput, similarity_measure: Callable = cosine_sim
4445 Returns:
4546 The information density for each sample.
4647 """
47- inf_density = np .zeros (shape = (X .shape [0 ],))
48- for X_idx , X_inst in enumerate (X ):
49- inf_density [X_idx ] = sum (similarity_measure (X_inst , X_j ) for X_j in X )
48+ # inf_density = np.zeros(shape=(X.shape[0],))
49+ # for X_idx, X_inst in enumerate(X):
50+ # inf_density[X_idx] = sum(similarity_measure(X_inst, X_j) for X_j in X)
51+ #
52+ # return inf_density/X.shape[0]
5053
51- return inf_density / X .shape [0 ]
54+ similarity_mtx = 1 / (1 + pairwise_distances (X , X , metric = metric ))
55+
56+ return similarity_mtx .mean (axis = 1 )
0 commit comments