1- # cython: boundscheck=False, nonecheck=False, initializedcheck=False
1+ # cython: boundscheck=False
2+ # cython: nonecheck=False
3+ # cython: initializedcheck=False
24# mutual reachability distance compiutations
35# Authors: Leland McInnes
46# License: 3-clause BSD
@@ -11,51 +13,54 @@ from scipy.sparse import lil_matrix as sparse_matrix
1113from sklearn.neighbors import KDTree, BallTree
1214import gc
1315
16+
1417def mutual_reachability (distance_matrix , min_points = 5 , alpha = 1.0 ):
1518 """ Compute the weighted adjacency matrix of the mutual reachability
1619 graph of a distance matrix.
17-
20+
1821 Parameters
1922 ----------
20- distance_matrix : array [ n_samples, n_samples]
23+ distance_matrix : ndarray, shape ( n_samples, n_samples)
2124 Array of distances between samples.
22-
23- min_points : int optional
25+
26+ min_points : int, optional (default=5)
2427 The number of points in a neighbourhood for a point to be considered
25- a core point. (defaults to 5)
28+ a core point.
2629
2730 Returns
2831 -------
29- mututal_reachability: array [ n_samples, n_samples]
32+ mututal_reachability: ndarray, shape ( n_samples, n_samples)
3033 Weighted adjacency matrix of the mutual reachability graph.
31-
34+
3235 References
3336 ----------
34- R. Campello, D. Moulavi, and J. Sander, "Density-Based Clustering Based on
35- Hierarchical Density Estimates"
36- In: Advances in Knowledge Discovery and Data Mining, Springer, pp 160-172.
37- 2013
37+ .. [1] Campello, R. J., Moulavi, D., & Sander, J. (2013, April).
38+ Density-based clustering based on hierarchical density estimates.
39+ In Pacific-Asia Conference on Knowledge Discovery and Data Mining
40+ (pp. 160-172). Springer Berlin Heidelberg.
3841 """
3942 size = distance_matrix.shape[0 ]
4043 min_points = min (size - 1 , min_points)
4144 try :
42- core_distances = np.partition(distance_matrix,
43- min_points,
45+ core_distances = np.partition(distance_matrix,
46+ min_points,
4447 axis = 0 )[min_points]
4548 except AttributeError :
4649 core_distances = np.sort(distance_matrix,
4750 axis = 0 )[min_points]
4851
4952 if alpha != 1.0 :
5053 distance_matrix = distance_matrix / alpha
51-
52- stage1 = np.where(core_distances > distance_matrix,
54+
55+ stage1 = np.where(core_distances > distance_matrix,
5356 core_distances, distance_matrix)
5457 result = np.where(core_distances > stage1.T,
5558 core_distances.T, stage1.T).T
5659 return result
5760
58- cpdef sparse_mutual_reachability(object lil_matrix, np.intp_t min_points = 5 , float alpha = 1.0 ):
61+
62+ cpdef sparse_mutual_reachability(object lil_matrix, np.intp_t min_points = 5 ,
63+ float alpha = 1.0 ):
5964
6065 cdef np.intp_t i
6166 cdef np.intp_t j
@@ -88,7 +93,9 @@ cpdef sparse_mutual_reachability(object lil_matrix, np.intp_t min_points=5, floa
8893
8994 return result.tocsr()
9095
91- def kdtree_mutual_reachability (X , distance_matrix , metric , p = 2 , min_points = 5 , alpha = 1.0 , **kwargs ):
96+
97+ def kdtree_mutual_reachability (X , distance_matrix , metric , p = 2 , min_points = 5 ,
98+ alpha = 1.0 , **kwargs ):
9299 dim = distance_matrix.shape[0 ]
93100 min_points = min (dim - 1 , min_points)
94101
@@ -97,7 +104,7 @@ def kdtree_mutual_reachability(X, distance_matrix, metric, p=2, min_points=5, al
97104 else :
98105 tree = KDTree(X, metric = metric, ** kwargs)
99106
100- core_distances = tree.query(X, k = min_points)[0 ][:,- 1 ]
107+ core_distances = tree.query(X, k = min_points)[0 ][:, - 1 ]
101108
102109 if alpha != 1.0 :
103110 distance_matrix = distance_matrix / alpha
@@ -108,13 +115,15 @@ def kdtree_mutual_reachability(X, distance_matrix, metric, p=2, min_points=5, al
108115 core_distances.T, stage1.T).T
109116 return result
110117
111- def balltree_mutual_reachability (X , distance_matrix , metric , p = 2 , min_points = 5 , alpha = 1.0 , **kwargs ):
118+
119+ def balltree_mutual_reachability (X , distance_matrix , metric , p = 2 , min_points = 5 ,
120+ alpha = 1.0 , **kwargs ):
112121 dim = distance_matrix.shape[0 ]
113122 min_points = min (dim - 1 , min_points)
114123
115124 tree = BallTree(X, metric = metric, ** kwargs)
116125
117- core_distances = tree.query(X, k = min_points)[0 ][:,- 1 ]
126+ core_distances = tree.query(X, k = min_points)[0 ][:, - 1 ]
118127
119128 if alpha != 1.0 :
120129 distance_matrix = distance_matrix / alpha
@@ -125,8 +134,10 @@ def balltree_mutual_reachability(X, distance_matrix, metric, p=2, min_points=5,
125134 core_distances.T, stage1.T).T
126135 return result
127136
137+
128138cdef np.ndarray[np.double_t, ndim= 1 ] mutual_reachability_from_pdist(
129- np.ndarray[np.double_t, ndim= 1 ] core_distances, np.ndarray[np.double_t, ndim= 1 ] dists, np.intp_t dim):
139+ np.ndarray[np.double_t, ndim= 1 ] core_distances,
140+ np.ndarray[np.double_t, ndim= 1 ] dists, np.intp_t dim):
130141
131142 cdef np.intp_t i
132143 cdef np.intp_t j
@@ -148,7 +159,8 @@ cdef np.ndarray[np.double_t, ndim=1] mutual_reachability_from_pdist(
148159 return dists
149160
150161
151- def kdtree_pdist_mutual_reachability (X , metric , p = 2 , min_points = 5 , alpha = 1.0 , **kwargs ):
162+ def kdtree_pdist_mutual_reachability (X , metric , p = 2 , min_points = 5 , alpha = 1.0 ,
163+ **kwargs ):
152164
153165 dim = X.shape[0 ]
154166 min_points = min (dim - 1 , min_points)
@@ -158,7 +170,7 @@ def kdtree_pdist_mutual_reachability(X, metric, p=2, min_points=5, alpha=1.0, *
158170 else :
159171 tree = KDTree(X, metric = metric, ** kwargs)
160172
161- core_distances = tree.query(X, k = min_points)[0 ][:,- 1 ]
173+ core_distances = tree.query(X, k = min_points)[0 ][:, - 1 ]
162174
163175 del tree
164176 gc.collect()
@@ -172,14 +184,16 @@ def kdtree_pdist_mutual_reachability(X, metric, p=2, min_points=5, alpha=1.0, *
172184
173185 return dists
174186
175- def balltree_pdist_mutual_reachability (X , metric , p = 2 , min_points = 5 , alpha = 1.0 , **kwargs ):
187+
188+ def balltree_pdist_mutual_reachability (X , metric , p = 2 , min_points = 5 , alpha = 1.0 ,
189+ **kwargs ):
176190
177191 dim = X.shape[0 ]
178192 min_points = min (dim - 1 , min_points)
179193
180194 tree = BallTree(X, metric = metric, ** kwargs)
181195
182- core_distances = tree.query(X, k = min_points)[0 ][:,- 1 ]
196+ core_distances = tree.query(X, k = min_points)[0 ][:, - 1 ]
183197
184198 del tree
185199 gc.collect()
0 commit comments