@@ -11,6 +11,8 @@ cimport numpy as np
1111from libc.float cimport DBL_MAX
1212
1313from scipy.spatial.distance import cdist, pdist
14+ from dist_metrics import DistanceMetric
15+ from dist_metrics cimport DistanceMetric
1416
1517cpdef np.ndarray[np.double_t, ndim= 2 ] mst_linkage_core(
1618 np.ndarray[np.double_t, ndim= 2 ] distance_matrix):
@@ -80,7 +82,6 @@ cdef void select_distances(
8082
8183 result_buffer[i:n_labels] = pdist_matrix[current_labels[i:] - (row_num + 1 ) + row_start]
8284 return
83-
8485
8586cpdef np.ndarray[np.double_t, ndim= 2 ] mst_linkage_core_pdist(
8687 np.ndarray[np.double_t, ndim= 1 ] pdist_matrix):
@@ -127,29 +128,33 @@ cpdef np.ndarray[np.double_t, ndim=2] mst_linkage_core_pdist(
127128
128129 return result
129130
131+
130132cpdef np.ndarray[np.double_t, ndim= 2 ] mst_linkage_core_cdist(
131- np.ndarray raw_data,
133+ np.ndarray[np.double_t, ndim = 2 ] raw_data,
132134 np.ndarray[np.double_t, ndim= 1 ] core_distances,
133- object metric,
134- int p):
135+ DistanceMetric dist_metric):
135136
136- cdef np.ndarray[np.int64_t, ndim= 1 ] node_labels
137- cdef np.ndarray[np.int64_t, ndim= 1 ] current_labels
138- cdef np.ndarray[np.double_t, ndim= 1 ] current_distances
139- cdef np.ndarray[np.double_t, ndim= 1 ] current_core_distances
140- cdef np.ndarray[np.double_t, ndim= 1 ] left
141- # cdef np.ndarray[np.double_t, ndim=1] right
142- cdef np.ndarray[np.double_t, ndim= 2 ] result
137+ cdef np.ndarray[np.double_t, ndim= 1 ] current_distances_arr
138+ # cdef np.ndarray[np.double_t, ndim=1] left_arr
139+ cdef np.ndarray[np.int8_t, ndim= 1 ] in_tree_arr
140+ cdef np.ndarray[np.double_t, ndim= 2 ] result_arr
141+
142+ cdef np.double_t * current_distances
143+ cdef np.double_t * current_core_distances
144+ cdef np.double_t * raw_data_ptr
145+ # cdef np.double_t * left
146+ cdef np.int8_t * in_tree
147+ cdef np.double_t[:, ::1 ] raw_data_view
148+ cdef np.double_t[:, ::1 ] result
143149
144150 cdef np.ndarray label_filter
145151
146152 cdef long long current_node
147- cdef long long comparison_node
148- cdef long long new_node_index
149153 cdef long long new_node
150154 cdef long long i
151155 cdef long long j
152156 cdef long long dim
157+ cdef long long num_features
153158
154159 cdef double current_node_core_distance
155160 cdef double right_value
@@ -158,39 +163,49 @@ cpdef np.ndarray[np.double_t, ndim=2] mst_linkage_core_cdist(
158163 cdef double new_distance
159164
160165 dim = raw_data.shape[0 ]
166+ num_features = raw_data.shape[1 ]
161167
162- result = np.zeros((dim - 1 , 3 ))
163- node_labels = np.arange(dim, dtype = np.int64)
168+ raw_data_view = (< np.double_t [:raw_data.shape[0 ], :raw_data.shape[1 ]:1 ]> (< np.double_t * > raw_data.data))
169+ raw_data_ptr = (< np.double_t * > & raw_data_view[0 , 0 ])
170+
171+ result_arr = np.zeros((dim - 1 , 3 ))
172+ in_tree_arr = np.zeros(dim, dtype = np.int8)
164173 current_node = 0
165- current_distances = np.infty * np.ones(dim)
166- current_labels = node_labels
167- current_core_distances = core_distances
174+ current_distances_arr = np.infty * np.ones(dim)
168175
169- masked = 0
176+ result = (< np.double_t [:dim - 1 , :3 :1 ]> (< np.double_t * > result_arr.data))
177+ in_tree = (< np.int8_t * > in_tree_arr.data)
178+ current_distances = (< np.double_t * > current_distances_arr.data)
179+ current_core_distances = (< np.double_t * > core_distances.data)
180+ # raw_data_view = (<np.double_t [:raw_data.shape[0], :raw_data.shape[1]:1]> (<np.double_t *> raw_data.data))
170181
171182 for i in range (1 , dim):
172183
173- label_filter = current_labels ! = current_node
174- current_labels = current_labels[label_filter]
175- current_core_distances = current_core_distances[label_filter ]
184+ in_tree[current_node] = 1
185+
186+ # left_arr = cdist((raw_data_view[current_node],), raw_data, metric=metric, p=p)[0 ]
176187
177- left = cdist(raw_data[[ current_node]], raw_data, metric = metric, p = p)[ 0 ][current_labels] # good
188+ current_node_core_distance = current_core_distances[ current_node]
178189
179- current_distances = current_distances[label_filter]
180- current_node_core_distance = core_distances[current_node]
190+ # left = (<np.double_t *> left_arr.data)
181191
182192 new_distance = DBL_MAX
183193 new_node = 0
184194
185- for j in range (current_labels.shape[0 ]):
195+ for j in range (dim):
196+ if in_tree[j]:
197+ continue
198+
186199 right_value = current_distances[j]
187- left_value = left[j]
188- comparison_node = current_labels[j]
189- core_value = core_distances[comparison_node]
200+ # left_value = left[j]
201+ left_value = dist_metric.dist(& raw_data_ptr[num_features * current_node],
202+ & raw_data_ptr[num_features * j],
203+ num_features)
204+ core_value = core_distances[j]
190205 if current_node_core_distance > right_value or core_value > right_value or left_value > right_value:
191206 if right_value < new_distance:
192207 new_distance = right_value
193- new_node = current_labels[j]
208+ new_node = j
194209 continue
195210
196211 if core_value > current_node_core_distance:
@@ -204,81 +219,18 @@ cpdef np.ndarray[np.double_t, ndim=2] mst_linkage_core_cdist(
204219 current_distances[j] = left_value
205220 if left_value < new_distance:
206221 new_distance = left_value
207- new_node = current_labels[j]
222+ new_node = j
208223 else :
209224 if right_value < new_distance:
210225 new_distance = right_value
211- new_node = current_labels[j]
226+ new_node = j
212227
213228 result[i - 1 , 0 ] = < double > current_node
214229 result[i - 1 , 1 ] = < double > new_node
215230 result[i - 1 , 2 ] = new_distance
216231 current_node = new_node
217232
218- return result
219-
220-
221-
222- cdef tuple get_candidate(object kdtree, long long point_index, np.ndarray[np.double_t, ndim= 1 ] core_distances):
223-
224- point_core_distance = core_distances[point_index]
225- point = kdtree.data[point_index]
226- first_pass_candidates = kdtree.query_radius(point, point_core_distance)[0 ]
227-
228- candidate_core_distances = core_distances[first_pass_candidates]
229- best_candidate_index = candidate_core_distances.argmin()
230- second_pass_radius = candidate_core_distances[best_candidate_index]
231-
232- if second_pass_radius <= point_core_distance:
233- return first_pass_candidates[best_candidate_index], point_core_distance
234-
235- second_pass_candidates, second_pass_distances = kdtree.query_radius(point, second_pass_radius, return_distance = True )
236-
237- candidate_core_distances = core_distances[second_pass_candidates]
238-
239- candidate_reachability_distances = np.empty(len (second_pass_candidates), dtype = np.double)
240-
241- for i in range (len (second_pass_candidates)):
242- if candidate_core_distances[i] > second_pass_distances:
243- if candidate_core_distances[i] > point_core_distance:
244- candidate_reachability_distances[i] = second_pass_distances[i]
245- else :
246- candidate_reachability_distances[i] = point_core_distance
247- else :
248- if second_pass_distances[i] > point_core_distance:
249- candidate_reachability_distances[i] = second_pass_distances[i]
250- else :
251- candidate_reachability_distances[i] = point_core_distance
252-
253- best_candidate_index = candidate_reachability_distances.argmin()
254-
255- return (second_pass_candidates[best_candidate_index],
256- candidate_reachability_distances[best_candidate_index])
257-
258- # Started blocking this out, so hang onto it for now, but need considerably more work
259- # to make it correct (deal with removing and updating node candidates rom the queue)
260- # cpdef mst_linkage_core_kdtree(object kdtree, np.ndarray[np.double_t, ndim=1] core_distances):
261- #
262- # cdef long long dim
263- # cdef long long current_node
264- # cdef long long new_candidate
265- #
266- # dim = kdtree.data.shape[0]
267- # node_labels = np.arange(dim, dtype=np.int64)
268- # result = np.zeros((dim - 1, 3))
269- #
270- # current_node = 0
271- #
272- # for i in range(1, dim):
273- # new_candidate, new_distance = get_candidate(kdtree, current_node, core_distances)
274- # priority_queue.add(new_candidate, new_distance)
275- # new_node, new_distance = priority_queue.pop()
276- # result[i - 1, 0] = <double> current_node
277- # result[i - 1, 1] = <double> new_node
278- # result[i - 1, 2] = new_distance
279- # current_node = new_node
280- #
281- # return result
233+ return result_arr
282234
283235cdef class UnionFind (object ):
284236
0 commit comments