Skip to content

Commit d0869a5

Browse files
authored
Merge pull request #176 from thclark/master
Fixed pathologically slow looped plotting routine.
2 parents 23a11a0 + 37835ab commit d0869a5

File tree

1 file changed

+34
-28
lines changed

1 file changed

+34
-28
lines changed

hdbscan/plots.py

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -51,19 +51,19 @@ def _get_leaves(condensed_tree):
5151
class CondensedTree(object):
5252
"""The condensed tree structure, which provides a simplified or smoothed version
5353
of the :class:`~hdbscan.plots.SingleLinkageTree`.
54-
54+
5555
Parameters
5656
----------
5757
condensed_tree_array : numpy recarray from :class:`~hdbscan.HDBSCAN`
5858
The raw numpy rec array version of the condensed tree as produced
5959
internally by hdbscan.
60-
60+
6161
cluster_selection_method : string, optional (default 'eom')
6262
The method of selecting clusters. One of 'eom' or 'leaf'
6363
6464
allow_single_cluster : Boolean, optional (default False)
6565
Whether to allow the root cluster as the only selected cluster
66-
66+
6767
"""
6868
def __init__(self, condensed_tree_array, cluster_selection_method='eom',
6969
allow_single_cluster=False):
@@ -75,20 +75,20 @@ def get_plot_data(self,
7575
leaf_separation=1,
7676
log_size=False,
7777
max_rectangle_per_icicle=20):
78-
"""Generates data for use in plotting the 'icicle plot' or dendrogram
78+
"""Generates data for use in plotting the 'icicle plot' or dendrogram
7979
plot of the condensed tree generated by HDBSCAN.
8080
8181
Parameters
8282
----------
8383
leaf_separation : float, optional
84-
How far apart to space the final leaves of the
84+
How far apart to space the final leaves of the
8585
dendrogram. (default 1)
8686
8787
log_size : boolean, optional
8888
Use log scale for the 'size' of clusters (i.e. number of
8989
points in the cluster at a given lambda value).
9090
(default False)
91-
91+
9292
max_rectangles_per_icicle : int, optional
9393
To simplify the plot this method will only emit
9494
``max_rectangles_per_icicle`` bars per branch of the dendrogram.
@@ -104,7 +104,7 @@ def get_plot_data(self,
104104
`bar_bottoms` y coordinate of bottoms of bars
105105
`bar_widths` widths of the bars (in x coord scale)
106106
`bar_bounds` a 4-tuple of [left, right, bottom, top]
107-
giving the bounds on a full set of
107+
giving the bounds on a full set of
108108
cluster bars
109109
Data associates with cluster splits:
110110
`line_xs` x coordinates for horizontal dendrogram lines
@@ -135,8 +135,8 @@ def get_plot_data(self,
135135
cluster_y_coords[left_child] = split['lambda_val'][0]
136136
cluster_y_coords[right_child] = split['lambda_val'][1]
137137

138-
# We use bars to plot the 'icicles', so we need to generate centers, tops,
139-
# bottoms and widths for each rectangle. We can go through each cluster
138+
# We use bars to plot the 'icicles', so we need to generate centers, tops,
139+
# bottoms and widths for each rectangle. We can go through each cluster
140140
# and do this for each in turn.
141141
bar_centers = []
142142
bar_tops = []
@@ -279,7 +279,7 @@ def plot(self, leaf_separation=1, cmap='viridis', select_clusters=False,
279279
Parameters
280280
----------
281281
leaf_separation : float, optional (default 1)
282-
How far apart to space the final leaves of the
282+
How far apart to space the final leaves of the
283283
dendrogram.
284284
285285
cmap : string or matplotlib colormap, optional (default viridis)
@@ -315,7 +315,7 @@ def plot(self, leaf_separation=1, cmap='viridis', select_clusters=False,
315315
Use log scale for the 'size' of clusters (i.e. number of
316316
points in the cluster at a given lambda value).
317317
318-
318+
319319
max_rectangles_per_icicle : int, optional (default 20)
320320
To simplify the plot this method will only emit
321321
``max_rectangles_per_icicle`` bars per branch of the dendrogram.
@@ -359,8 +359,14 @@ def plot(self, leaf_separation=1, cmap='viridis', select_clusters=False,
359359
linewidth=0
360360
)
361361

362+
drawlines = []
362363
for xs, ys in zip(plot_data['line_xs'], plot_data['line_ys']):
363-
axis.plot(xs, ys, color='black', linewidth=1)
364+
drawlines.append(xs)
365+
drawlines.append(ys)
366+
axis.plot(*drawlines, color='black', linewidth=1)
367+
# for xs, ys in zip(plot_data['line_xs'], plot_data['line_ys']):
368+
# axis.plot(xs, ys, color='black', linewidth=1)
369+
364370
if select_clusters:
365371
try:
366372
from matplotlib.patches import Ellipse
@@ -426,10 +432,10 @@ def to_pandas(self):
426432
427433
Each row of the dataframe corresponds to an edge in the tree.
428434
The columns of the dataframe are `parent`, `child`, `lambda_val`
429-
and `child_size`.
435+
and `child_size`.
430436
431437
The `parent` and `child` are the ids of the
432-
parent and child nodes in the tree. Node ids less than the number
438+
parent and child nodes in the tree. Node ids less than the number
433439
of points in the original dataset represent individual points, while
434440
ids greater than the number of points are clusters.
435441
@@ -482,13 +488,13 @@ def _line_width(y, linkage):
482488
class SingleLinkageTree(object):
483489
"""A single linkage format dendrogram tree, with plotting functionality
484490
and networkX support.
485-
491+
486492
Parameters
487493
----------
488494
linkage : ndarray (n_samples, 4)
489495
The numpy array that holds the tree structure. As output by
490496
scipy.cluster.hierarchy, hdbscan, of fastcluster.
491-
497+
492498
"""
493499
def __init__(self, linkage):
494500
self._linkage = linkage
@@ -500,29 +506,29 @@ def plot(self, axis=None, truncate_mode=None, p=0, vary_line_width=True,
500506
Parameters
501507
----------
502508
truncate_mode : str, optional
503-
The dendrogram can be hard to read when the original
504-
observation matrix from which the linkage is derived
505-
is large. Truncation is used to condense the dendrogram.
509+
The dendrogram can be hard to read when the original
510+
observation matrix from which the linkage is derived
511+
is large. Truncation is used to condense the dendrogram.
506512
There are several modes:
507513
508514
``None/'none'``
509515
No truncation is performed (Default).
510-
516+
511517
``'lastp'``
512-
The last p non-singleton formed in the linkage are the only
513-
non-leaf nodes in the linkage; they correspond to rows
514-
Z[n-p-2:end] in Z. All other non-singleton clusters are
518+
The last p non-singleton formed in the linkage are the only
519+
non-leaf nodes in the linkage; they correspond to rows
520+
Z[n-p-2:end] in Z. All other non-singleton clusters are
515521
contracted into leaf nodes.
516522
517523
``'level'/'mtica'``
518-
No more than p levels of the dendrogram tree are displayed.
524+
No more than p levels of the dendrogram tree are displayed.
519525
This corresponds to Mathematica(TM) behavior.
520526
521527
p : int, optional
522528
The ``p`` parameter for ``truncate_mode``.
523529
524530
vary_line_width : boolean, optional
525-
Draw downward branches of the dendrogram with line thickness that
531+
Draw downward branches of the dendrogram with line thickness that
526532
varies depending on the size of the cluster.
527533
528534
cmap : string or matplotlib colormap, optional
@@ -617,11 +623,11 @@ def to_pandas(self):
617623
"""Return a pandas dataframe representation of the single linkage tree.
618624
619625
Each row of the dataframe corresponds to an edge in the tree.
620-
The columns of the dataframe are `parent`, `left_child`,
626+
The columns of the dataframe are `parent`, `left_child`,
621627
`right_child`, `distance` and `size`.
622628
623629
The `parent`, `left_child` and `right_child` are the ids of the
624-
parent and child nodes in the tree. Node ids less than the number
630+
parent and child nodes in the tree. Node ids less than the number
625631
of points in the original dataset represent individual points, while
626632
ids greater than the number of points are clusters.
627633
@@ -731,7 +737,7 @@ def plot(self, axis=None, node_size=40, node_color='k',
731737
(default 0.8).
732738
733739
edge_cmap : matplotlib colormap, optional
734-
The colormap to color edges by (varying color by edge
740+
The colormap to color edges by (varying color by edge
735741
weight/distance). Can be a cmap object or a string
736742
recognised by matplotlib. (default `viridis_r`)
737743

0 commit comments

Comments
 (0)