Skip to content

Commit eb1993b

Browse files
committed
Merge remote-tracking branch 'origin/master'
# Conflicts: # .idea/codeStyleSettings.xml
2 parents 1d2a9e0 + f47af2f commit eb1993b

File tree

13 files changed

+18786
-16529
lines changed

13 files changed

+18786
-16529
lines changed

.idea/codeStyleSettings.xml

Lines changed: 1 addition & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/hdbscan.iml

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/misc.xml

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

hdbscan/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
from .hdbscan_ import HDBSCAN, hdbscan
22
from .robust_single_linkage_ import RobustSingleLinkage, robust_single_linkage
3+
from .validity import validity_index
4+

hdbscan/_hdbscan_boruvka.c

Lines changed: 7175 additions & 6384 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

hdbscan/_hdbscan_linkage.c

Lines changed: 3625 additions & 3329 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

hdbscan/_hdbscan_reachability.c

Lines changed: 781 additions & 773 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

hdbscan/_hdbscan_tree.c

Lines changed: 5294 additions & 4550 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

hdbscan/_hdbscan_tree.pyx

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -373,12 +373,14 @@ cdef np.ndarray[np.intp_t, ndim=1] do_labelling(
373373
np.ndarray tree,
374374
set clusters,
375375
dict cluster_label_map,
376-
np.intp_t allow_single_cluster):
376+
np.intp_t allow_single_cluster,
377+
np.intp_t match_reference_implementation):
377378

378379
cdef np.intp_t root_cluster
379380
cdef np.ndarray[np.intp_t, ndim=1] result_arr
380381
cdef np.ndarray[np.intp_t, ndim=1] parent_array
381382
cdef np.ndarray[np.intp_t, ndim=1] child_array
383+
cdef np.ndarray[np.double_t, ndim=1] lambda_array
382384
cdef np.intp_t *result
383385
cdef TreeUnionFind union_find
384386
cdef np.intp_t parent
@@ -388,6 +390,7 @@ cdef np.ndarray[np.intp_t, ndim=1] do_labelling(
388390

389391
child_array = tree['child']
390392
parent_array = tree['parent']
393+
lambda_array = tree['lambda_val']
391394

392395
root_cluster = parent_array.min()
393396
result_arr = np.empty(root_cluster, dtype=np.intp)
@@ -406,14 +409,22 @@ cdef np.ndarray[np.intp_t, ndim=1] do_labelling(
406409
if cluster < root_cluster:
407410
result[n] = -1
408411
elif cluster == root_cluster:
409-
if len(clusters) == 1 and \
412+
if len(clusters) == 1 and allow_single_cluster and \
410413
tree['lambda_val'][tree['child'] == n] >= \
411414
tree['lambda_val'][tree['parent'] == cluster].max():
412415
result[n] = cluster_label_map[cluster]
413416
else:
414417
result[n] = -1
415418
else:
416-
result[n] = cluster_label_map[cluster]
419+
if match_reference_implementation:
420+
point_lambda = lambda_array[child_array == n][0]
421+
cluster_lambda = lambda_array[child_array == cluster][0]
422+
if point_lambda > cluster_lambda:
423+
result[n] = cluster_label_map[cluster]
424+
else:
425+
result[n] = -1
426+
else:
427+
result[n] = cluster_label_map[cluster]
417428

418429
return result_arr
419430

@@ -525,7 +536,8 @@ cpdef np.ndarray get_stability_scores(np.ndarray labels, set clusters,
525536

526537

527538
cpdef tuple get_clusters(np.ndarray tree, dict stability,
528-
allow_single_cluster=False):
539+
allow_single_cluster=False,
540+
match_reference_implementation=False):
529541
"""
530542
The tree is assumed to have numeric node ids such that a reverse numeric
531543
sort is equivalent to a topological sort.
@@ -571,10 +583,11 @@ cpdef tuple get_clusters(np.ndarray tree, dict stability,
571583
is_cluster[sub_node] = False
572584

573585
clusters = set([c for c in is_cluster if is_cluster[c]])
574-
cluster_map = {c: n for n, c in enumerate(clusters)}
575-
reverse_cluster_map = {n: c for n, c in enumerate(clusters)}
586+
cluster_map = {c: n for n, c in enumerate(sorted(list(clusters)))}
587+
reverse_cluster_map = {n: c for c, n in cluster_map.items()}
576588

577-
labels = do_labelling(tree, clusters, cluster_map, allow_single_cluster)
589+
labels = do_labelling(tree, clusters, cluster_map,
590+
allow_single_cluster, match_reference_implementation)
578591
probs = get_probabilities(tree, reverse_cluster_map, labels)
579592
stabilities = get_stability_scores(labels, clusters, stability, max_lambda)
580593

0 commit comments

Comments
 (0)