@@ -278,7 +278,7 @@ def density_separation(X, labels, cluster_id1, cluster_id2,
278278
279279
280280def validity_index (X , labels , metric = 'euclidean' ,
281- d = None , per_cluster_scores = False , mst_euclid_only = False , verbose = False , ** kwd_args ):
281+ d = None , per_cluster_scores = False , mst_raw_dist = False , verbose = False , ** kwd_args ):
282282 """
283283 Compute the density based cluster validity index for the
284284 clustering specified by `labels` and for each cluster in `labels`.
@@ -310,6 +310,11 @@ def validity_index(X, labels, metric='euclidean',
310310 Defaults to False with the function returning a single float
311311 value for the whole clustering.
312312
313+ mst_raw_dist : optional, boolean (default False)
314+ If True, the MST's are constructed solely via 'raw' distances (depending on the given metric, e.g. euclidean distances)
315+ instead of using mutual reachability distances. Thus, setting this parameter to True, avoids using 'all-points-core-distances' at all.
316+ This is advantageous specifically in the case of elongated clusters that lie in close proximity to each other <citation needed>.
317+
313318 **kwd_args :
314319 Extra arguments to pass to the distance computation for other
315320 metrics, such as minkowski, Mahanalobis etc.
@@ -353,7 +358,7 @@ def validity_index(X, labels, metric='euclidean',
353358 cluster_id ,
354359 metric ,
355360 d ,
356- no_coredist = mst_euclid_only ,
361+ no_coredist = mst_raw_dist ,
357362 print_max_euclid_to_coredist_ratios = verbose ,
358363 ** kwd_args
359364 )
@@ -378,7 +383,7 @@ def validity_index(X, labels, metric='euclidean',
378383 X , labels , i , j ,
379384 internal_nodes_i , internal_nodes_j ,
380385 core_distances [i ], core_distances [j ],
381- metric = metric , no_coredist = mst_euclid_only ,
386+ metric = metric , no_coredist = mst_raw_dist ,
382387 ** kwd_args
383388 )
384389 density_sep [j , i ] = density_sep [i , j ]
0 commit comments