@@ -210,72 +210,6 @@ def get_plot_data(self,
210210 'cluster_bounds' : cluster_bounds
211211 }
212212
213- def get_simple_plot_data (self , leaf_separation = 1 , log_size = False ,
214- max_rectangle_per_icicle = 20 ):
215- """Generates simplified data for use in plotting the 'icicle plot' or
216- dendrogram plot of the condensed tree generated by HDBSCAN.
217-
218- Parameters
219- ----------
220- leaf_separation : float, optional
221- How far apart to space the final leaves of the
222- dendrogram. (default 1)
223-
224- log_size : boolean, optional
225- Use log scale for the 'size' of clusters (i.e. number of
226- points in the cluster at a given lambda value).
227- (default False)
228-
229- max_rectangles_per_icicle : int, optional
230- To simplify the plot this method will only emit
231- ``max_rectangles_per_icicle`` bars per branch of the dendrogram.
232- This ensures that we don't suffer from massive overplotting in
233- cases with a lot of data points.
234-
235- Returns
236- -------
237- plot_data : dict
238- Data associated to bars in a bar plot:
239- `bar_centers` x coordinate centers for bars
240- `bar_tops` heights of bars in lambda scale
241- `bar_bottoms` y coordinate of bottoms of bars
242- `bar_widths` widths of the bars (in x coord scale)
243- `bar_bounds` a 4-tuple of [left, right, bottom, top]
244- giving the bounds on a full set of
245- cluster bars
246- Data associates with cluster splits:
247- `line_xs` x coordinates for horiontal dendrogram lines
248- `line_ys` y coordinates for horiontal dendrogram lines
249- """
250- leaves = _get_leaves (self ._raw_tree )
251- last_leaf = self ._raw_tree ['parent' ].max ()
252- root = self ._raw_tree ['parent' ].min ()
253-
254- # We want to get the x and y coordinates for the start of each cluster
255- # Initialize the leaves, since we know where they go, the iterate
256- # through everything from the leaves back, setting coords as we go
257- cluster_x_coords = dict (zip (leaves , [leaf_separation * x
258- for x in range (len (leaves ))]))
259- cluster_y_coords = {root : 0.0 }
260-
261- # We want to get the x and y coordinates for the start of each cluster
262- # Initialize the leaves, since we know where they go, the iterate
263- # through everything from the leaves back, setting coords as we go
264- cluster_x_coords = dict (zip (leaves , [leaf_separation * x
265- for x in range (len (leaves ))]))
266- cluster_y_coords = {root : 0.0 }
267-
268- for cluster in range (last_leaf , root - 1 , - 1 ):
269- split = self ._raw_tree [['child' , 'lambda_val' ]]
270- split = split [(self ._raw_tree ['parent' ] == cluster ) &
271- (self ._raw_tree ['child_size' ] > 1 )]
272- if len (split ['child' ]) > 1 :
273- left_child , right_child = split ['child' ]
274- cluster_x_coords [cluster ] = np .mean ([cluster_x_coords [left_child ],
275- cluster_x_coords [right_child ]])
276- cluster_y_coords [left_child ] = split ['lambda_val' ][0 ]
277- cluster_y_coords [right_child ] = split ['lambda_val' ][1 ]
278-
279213 def _select_clusters (self ):
280214 if self .cluster_selection_method == 'eom' :
281215 stability = compute_stability (self ._raw_tree )
0 commit comments