1- # cython: boundscheck=False, nonecheck=False
1+ # cython: boundscheck=False
2+ # cython: nonecheck=False
23# Minimum spanning tree single linkage implementation for hdbscan
34# Authors: Leland McInnes, Steve Astels
45# License: 3-clause BSD
@@ -10,8 +11,10 @@ from libc.float cimport DBL_MAX
1011
1112from dist_metrics cimport DistanceMetric
1213
14+
1315cpdef np.ndarray[np.double_t, ndim= 2 ] mst_linkage_core(
14- np.ndarray[np.double_t, ndim= 2 ] distance_matrix):
16+ np.ndarray[np.double_t,
17+ ndim= 2 ] distance_matrix):
1518
1619 cdef np.ndarray[np.intp_t, ndim= 1 ] node_labels
1720 cdef np.ndarray[np.intp_t, ndim= 1 ] current_labels
@@ -32,7 +35,7 @@ cpdef np.ndarray[np.double_t, ndim=2] mst_linkage_core(
3235 current_node = 0
3336 current_distances = np.infty * np.ones(distance_matrix.shape[0 ])
3437 current_labels = node_labels
35- for i in range (1 ,node_labels.shape[0 ]):
38+ for i in range (1 , node_labels.shape[0 ]):
3639 label_filter = current_labels != current_node
3740 current_labels = current_labels[label_filter]
3841 left = current_distances[label_filter]
@@ -48,12 +51,14 @@ cpdef np.ndarray[np.double_t, ndim=2] mst_linkage_core(
4851
4952 return result
5053
54+
5155cpdef np.ndarray[np.double_t, ndim= 2 ] mst_linkage_core_vector(
52- np.ndarray[np.double_t, ndim= 2 , mode= ' c' ] raw_data,
53- np.ndarray[np.double_t, ndim= 1 , mode= ' c' ] core_distances,
54- DistanceMetric dist_metric,
55- np.double_t alpha = 1.0 ):
56+ np.ndarray[np.double_t, ndim= 2 , mode= ' c' ] raw_data,
57+ np.ndarray[np.double_t, ndim= 1 , mode= ' c' ] core_distances,
58+ DistanceMetric dist_metric,
59+ np.double_t alpha = 1.0 ):
5660
61+ # Add a comment
5762 cdef np.ndarray[np.double_t, ndim= 1 ] current_distances_arr
5863 cdef np.ndarray[np.int8_t, ndim= 1 ] in_tree_arr
5964 cdef np.ndarray[np.double_t, ndim= 2 ] result_arr
@@ -83,15 +88,16 @@ cpdef np.ndarray[np.double_t, ndim=2] mst_linkage_core_vector(
8388 dim = raw_data.shape[0 ]
8489 num_features = raw_data.shape[1 ]
8590
86- raw_data_view = (< np.double_t [:raw_data.shape[0 ], :raw_data.shape[1 ]:1 ]> (< np.double_t * > raw_data.data))
91+ raw_data_view = (< np.double_t[:raw_data.shape[0 ], :raw_data.shape[1 ]:1 ]> (
92+ < np.double_t * > raw_data.data))
8793 raw_data_ptr = (< np.double_t * > & raw_data_view[0 , 0 ])
8894
8995 result_arr = np.zeros((dim - 1 , 3 ))
9096 in_tree_arr = np.zeros(dim, dtype = np.int8)
9197 current_node = 0
9298 current_distances_arr = np.infty * np.ones(dim)
9399
94- result = (< np.double_t [:dim - 1 , :3 :1 ]> (< np.double_t * > result_arr.data))
100+ result = (< np.double_t[:dim - 1 , :3 :1 ]> (< np.double_t * > result_arr.data))
95101 in_tree = (< np.int8_t * > in_tree_arr.data)
96102 current_distances = (< np.double_t * > current_distances_arr.data)
97103 current_core_distances = (< np.double_t * > core_distances.data)
@@ -110,15 +116,18 @@ cpdef np.ndarray[np.double_t, ndim=2] mst_linkage_core_vector(
110116 continue
111117
112118 right_value = current_distances[j]
113- left_value = dist_metric.dist(& raw_data_ptr[num_features * current_node],
119+ left_value = dist_metric.dist(& raw_data_ptr[num_features *
120+ current_node],
114121 & raw_data_ptr[num_features * j],
115122 num_features)
116123
117124 if alpha != 1.0 :
118125 left_value /= alpha
119126
120127 core_value = core_distances[j]
121- if current_node_core_distance > right_value or core_value > right_value or left_value > right_value:
128+ if (current_node_core_distance > right_value or
129+ core_value > right_value or
130+ left_value > right_value):
122131 if right_value < new_distance:
123132 new_distance = right_value
124133 new_node = j
@@ -148,6 +157,7 @@ cpdef np.ndarray[np.double_t, ndim=2] mst_linkage_core_vector(
148157
149158 return result_arr
150159
160+
151161cdef class UnionFind (object ):
152162
153163 cdef np.ndarray parent_arr
@@ -183,6 +193,7 @@ cdef class UnionFind (object):
183193 p, self .parent_arr[p] = self .parent_arr[p], n
184194 return n
185195
196+
186197cpdef np.ndarray[np.double_t, ndim= 2 ] label(np.ndarray[np.double_t, ndim= 2 ] L):
187198
188199 cdef np.ndarray[np.double_t, ndim= 2 ] result_arr
@@ -192,7 +203,8 @@ cpdef np.ndarray[np.double_t, ndim=2] label(np.ndarray[np.double_t, ndim=2] L):
192203 cdef np.double_t delta
193204
194205 result_arr = np.zeros((L.shape[0 ], L.shape[1 ] + 1 ))
195- result = (< np.double_t[:L.shape[0 ], :4 :1 ]> (< np.double_t * > result_arr.data))
206+ result = (< np.double_t[:L.shape[0 ], :4 :1 ]> (
207+ < np.double_t * > result_arr.data))
196208 N = L.shape[0 ] + 1
197209 U = UnionFind(N)
198210
@@ -208,18 +220,18 @@ cpdef np.ndarray[np.double_t, ndim=2] label(np.ndarray[np.double_t, ndim=2] L):
208220 result[index][1 ] = bb
209221 result[index][2 ] = delta
210222 result[index][3 ] = U.size[aa] + U.size[bb]
211-
223+
212224 U.union(aa, bb)
213-
225+
214226 return result_arr
215227
228+
216229cpdef np.ndarray[np.double_t, ndim= 2 ] single_linkage(distance_matrix):
217-
230+
218231 cdef np.ndarray[np.double_t, ndim= 2 ] hierarchy
219232 cdef np.ndarray[np.double_t, ndim= 2 ] for_labelling
220-
233+
221234 hierarchy = mst_linkage_core(distance_matrix)
222235 for_labelling = hierarchy[np.argsort(hierarchy.T[2 ]), :]
223- return label(for_labelling)
224236
225-
237+ return label(for_labelling)
0 commit comments