@@ -51,19 +51,19 @@ def _get_leaves(condensed_tree):
51
51
class CondensedTree (object ):
52
52
"""The condensed tree structure, which provides a simplified or smoothed version
53
53
of the :class:`~hdbscan.plots.SingleLinkageTree`.
54
-
54
+
55
55
Parameters
56
56
----------
57
57
condensed_tree_array : numpy recarray from :class:`~hdbscan.HDBSCAN`
58
58
The raw numpy rec array version of the condensed tree as produced
59
59
internally by hdbscan.
60
-
60
+
61
61
cluster_selection_method : string, optional (default 'eom')
62
62
The method of selecting clusters. One of 'eom' or 'leaf'
63
63
64
64
allow_single_cluster : Boolean, optional (default False)
65
65
Whether to allow the root cluster as the only selected cluster
66
-
66
+
67
67
"""
68
68
def __init__ (self , condensed_tree_array , cluster_selection_method = 'eom' ,
69
69
allow_single_cluster = False ):
@@ -75,20 +75,20 @@ def get_plot_data(self,
75
75
leaf_separation = 1 ,
76
76
log_size = False ,
77
77
max_rectangle_per_icicle = 20 ):
78
- """Generates data for use in plotting the 'icicle plot' or dendrogram
78
+ """Generates data for use in plotting the 'icicle plot' or dendrogram
79
79
plot of the condensed tree generated by HDBSCAN.
80
80
81
81
Parameters
82
82
----------
83
83
leaf_separation : float, optional
84
- How far apart to space the final leaves of the
84
+ How far apart to space the final leaves of the
85
85
dendrogram. (default 1)
86
86
87
87
log_size : boolean, optional
88
88
Use log scale for the 'size' of clusters (i.e. number of
89
89
points in the cluster at a given lambda value).
90
90
(default False)
91
-
91
+
92
92
max_rectangles_per_icicle : int, optional
93
93
To simplify the plot this method will only emit
94
94
``max_rectangles_per_icicle`` bars per branch of the dendrogram.
@@ -104,7 +104,7 @@ def get_plot_data(self,
104
104
`bar_bottoms` y coordinate of bottoms of bars
105
105
`bar_widths` widths of the bars (in x coord scale)
106
106
`bar_bounds` a 4-tuple of [left, right, bottom, top]
107
- giving the bounds on a full set of
107
+ giving the bounds on a full set of
108
108
cluster bars
109
109
Data associates with cluster splits:
110
110
`line_xs` x coordinates for horizontal dendrogram lines
@@ -135,8 +135,8 @@ def get_plot_data(self,
135
135
cluster_y_coords [left_child ] = split ['lambda_val' ][0 ]
136
136
cluster_y_coords [right_child ] = split ['lambda_val' ][1 ]
137
137
138
- # We use bars to plot the 'icicles', so we need to generate centers, tops,
139
- # bottoms and widths for each rectangle. We can go through each cluster
138
+ # We use bars to plot the 'icicles', so we need to generate centers, tops,
139
+ # bottoms and widths for each rectangle. We can go through each cluster
140
140
# and do this for each in turn.
141
141
bar_centers = []
142
142
bar_tops = []
@@ -279,7 +279,7 @@ def plot(self, leaf_separation=1, cmap='viridis', select_clusters=False,
279
279
Parameters
280
280
----------
281
281
leaf_separation : float, optional (default 1)
282
- How far apart to space the final leaves of the
282
+ How far apart to space the final leaves of the
283
283
dendrogram.
284
284
285
285
cmap : string or matplotlib colormap, optional (default viridis)
@@ -315,7 +315,7 @@ def plot(self, leaf_separation=1, cmap='viridis', select_clusters=False,
315
315
Use log scale for the 'size' of clusters (i.e. number of
316
316
points in the cluster at a given lambda value).
317
317
318
-
318
+
319
319
max_rectangles_per_icicle : int, optional (default 20)
320
320
To simplify the plot this method will only emit
321
321
``max_rectangles_per_icicle`` bars per branch of the dendrogram.
@@ -359,8 +359,14 @@ def plot(self, leaf_separation=1, cmap='viridis', select_clusters=False,
359
359
linewidth = 0
360
360
)
361
361
362
+ drawlines = []
362
363
for xs , ys in zip (plot_data ['line_xs' ], plot_data ['line_ys' ]):
363
- axis .plot (xs , ys , color = 'black' , linewidth = 1 )
364
+ drawlines .append (xs )
365
+ drawlines .append (ys )
366
+ axis .plot (* drawlines , color = 'black' , linewidth = 1 )
367
+ # for xs, ys in zip(plot_data['line_xs'], plot_data['line_ys']):
368
+ # axis.plot(xs, ys, color='black', linewidth=1)
369
+
364
370
if select_clusters :
365
371
try :
366
372
from matplotlib .patches import Ellipse
@@ -426,10 +432,10 @@ def to_pandas(self):
426
432
427
433
Each row of the dataframe corresponds to an edge in the tree.
428
434
The columns of the dataframe are `parent`, `child`, `lambda_val`
429
- and `child_size`.
435
+ and `child_size`.
430
436
431
437
The `parent` and `child` are the ids of the
432
- parent and child nodes in the tree. Node ids less than the number
438
+ parent and child nodes in the tree. Node ids less than the number
433
439
of points in the original dataset represent individual points, while
434
440
ids greater than the number of points are clusters.
435
441
@@ -482,13 +488,13 @@ def _line_width(y, linkage):
482
488
class SingleLinkageTree (object ):
483
489
"""A single linkage format dendrogram tree, with plotting functionality
484
490
and networkX support.
485
-
491
+
486
492
Parameters
487
493
----------
488
494
linkage : ndarray (n_samples, 4)
489
495
The numpy array that holds the tree structure. As output by
490
496
scipy.cluster.hierarchy, hdbscan, of fastcluster.
491
-
497
+
492
498
"""
493
499
def __init__ (self , linkage ):
494
500
self ._linkage = linkage
@@ -500,29 +506,29 @@ def plot(self, axis=None, truncate_mode=None, p=0, vary_line_width=True,
500
506
Parameters
501
507
----------
502
508
truncate_mode : str, optional
503
- The dendrogram can be hard to read when the original
504
- observation matrix from which the linkage is derived
505
- is large. Truncation is used to condense the dendrogram.
509
+ The dendrogram can be hard to read when the original
510
+ observation matrix from which the linkage is derived
511
+ is large. Truncation is used to condense the dendrogram.
506
512
There are several modes:
507
513
508
514
``None/'none'``
509
515
No truncation is performed (Default).
510
-
516
+
511
517
``'lastp'``
512
- The last p non-singleton formed in the linkage are the only
513
- non-leaf nodes in the linkage; they correspond to rows
514
- Z[n-p-2:end] in Z. All other non-singleton clusters are
518
+ The last p non-singleton formed in the linkage are the only
519
+ non-leaf nodes in the linkage; they correspond to rows
520
+ Z[n-p-2:end] in Z. All other non-singleton clusters are
515
521
contracted into leaf nodes.
516
522
517
523
``'level'/'mtica'``
518
- No more than p levels of the dendrogram tree are displayed.
524
+ No more than p levels of the dendrogram tree are displayed.
519
525
This corresponds to Mathematica(TM) behavior.
520
526
521
527
p : int, optional
522
528
The ``p`` parameter for ``truncate_mode``.
523
529
524
530
vary_line_width : boolean, optional
525
- Draw downward branches of the dendrogram with line thickness that
531
+ Draw downward branches of the dendrogram with line thickness that
526
532
varies depending on the size of the cluster.
527
533
528
534
cmap : string or matplotlib colormap, optional
@@ -617,11 +623,11 @@ def to_pandas(self):
617
623
"""Return a pandas dataframe representation of the single linkage tree.
618
624
619
625
Each row of the dataframe corresponds to an edge in the tree.
620
- The columns of the dataframe are `parent`, `left_child`,
626
+ The columns of the dataframe are `parent`, `left_child`,
621
627
`right_child`, `distance` and `size`.
622
628
623
629
The `parent`, `left_child` and `right_child` are the ids of the
624
- parent and child nodes in the tree. Node ids less than the number
630
+ parent and child nodes in the tree. Node ids less than the number
625
631
of points in the original dataset represent individual points, while
626
632
ids greater than the number of points are clusters.
627
633
@@ -731,7 +737,7 @@ def plot(self, axis=None, node_size=40, node_color='k',
731
737
(default 0.8).
732
738
733
739
edge_cmap : matplotlib colormap, optional
734
- The colormap to color edges by (varying color by edge
740
+ The colormap to color edges by (varying color by edge
735
741
weight/distance). Can be a cmap object or a string
736
742
recognised by matplotlib. (default `viridis_r`)
737
743
0 commit comments