Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions debacl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from debacl.level_set_tree import construct_tree
from debacl.level_set_tree import construct_tree_from_graph
from debacl.level_set_tree import construct_tree_from_distance_matrix
from debacl.level_set_tree import load_tree

from debacl.level_set_tree import LevelSetTree
75 changes: 73 additions & 2 deletions debacl/level_set_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,9 @@ class LevelSetTree(object):

LevelSetTree objects should not generally be instantiated directly,
because they will contain empty node hierarchies. Use the tree
constructors :func:`construct_tree` or
:func:`construct_tree_from_graph` to instantiate a LevelSetTree model.
constructors :func:`construct_tree`, :func:`construct_tree_from_graph`,
or :func: `construct_tree_from_distance_matrix` to instantiate a
LevelSetTree model.

Parameters
----------
Expand Down Expand Up @@ -1260,6 +1261,76 @@ def construct_tree(X, k, prune_threshold=None, num_levels=None, verbose=False):
return tree


def construct_tree_from_distance_matrix(X, p, k, prune_threshold=None,
num_levels=None, verbose=False):
"""
Construct a level set tree from a precomputed distance matrix.

Parameters
----------
X : 2-dimensional numpy array
A square distance matrix

p: int
Underlying dimensionality of the data that was used to create X, the
distance matrix

k : int
Number of observations to consider as neighbors to a given point.

prune_threshold : int, optional
Leaf nodes with fewer than this number of members are recursively
merged into larger nodes. If 'None' (the default), then no pruning
is performed.

num_levels : int, optional
Number of density levels in the constructed tree. If None (default),
`num_levels` is internally set to be the number of rows in `X`.

verbose : bool, optional
If True, a progress indicator is printed at every 100th level of tree
construction.

Returns
-------
T : LevelSetTree
A pruned level set tree.

See Also
--------
construct_tree, construct_tree_from_graph, LevelSetTree

Examples
--------
>>> X = numpy.random.rand(100, 2)
>>> distances = scipy.spatial.distance.pdist(X)
>>> distance_matrix = scipy.spatial.distance.squareform(distances)
>>> tree = debacl.construct_tree_from_distance_matrix(distance_matrix, p=2,
... k=8,
... prune_threshold=5)
>>> print(tree)
+----+-------------+-----------+------------+----------+------+--------+----------+
| id | start_level | end_level | start_mass | end_mass | size | parent | children |
+----+-------------+-----------+------------+----------+------+--------+----------+
| 0 | 0.000 | 0.870 | 0.000 | 0.450 | 100 | None | [3, 4] |
| 3 | 0.870 | 3.364 | 0.450 | 0.990 | 17 | 0 | [] |
| 4 | 0.870 | 1.027 | 0.450 | 0.520 | 35 | 0 | [7, 8] |
| 7 | 1.027 | 1.755 | 0.520 | 0.870 | 8 | 4 | [] |
| 8 | 1.027 | 3.392 | 0.520 | 1.000 | 23 | 4 | [] |
+----+-------------+-----------+------------+----------+------+--------+----------+
"""

sim_graph, radii = _utl.knn_graph(X, k, method='precomputed')

n, _ = X.shape
density = _utl.knn_density(radii, n, p, k)

tree = construct_tree_from_graph(adjacency_list=sim_graph, density=density,
prune_threshold=prune_threshold,
num_levels=num_levels, verbose=verbose)
return tree


def construct_tree_from_graph(adjacency_list, density, prune_threshold=None,
num_levels=None, verbose=False):
"""
Expand Down
13 changes: 13 additions & 0 deletions debacl/test/test_lst.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,19 @@ def test_construct_from_data(self):
self._check_tree_viability(tree)
self._check_tree_correctness(tree)

def test_construct_from_distance_matrix(self):
"""
Check viability and correctness of an LST constructed from a distance
matrix.
"""
distances = scipy.spatial.distance.pdist(self.dataset)
distance_matrix = scipy.spatial.distance.squareform(distances)
tree = dcl.construct_tree_from_distance_matrix(distance_matrix, self.k,
prune_threshold=self.gamma)

self._check_tree_viability(tree)
self._check_tree_correctness(tree)

def test_load(self):
"""
Check viability and correctness of an LST saved then loaded from file.
Expand Down
10 changes: 10 additions & 0 deletions debacl/test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@ def test_knn_graph(self):
for neighbors, ans_neighbors in zip(knn, ans_graph):
self.assertItemsEqual(neighbors, ans_neighbors)

distances = scipy.spatial.distance.pdist(self.X)
distance_matrix = scipy.spatial.distance.squareform(distances)
knn, radii = utl.knn_graph(distance_matrix, k, method='precomputed')

## Test precomputed method
assert_array_equal(radii, ans_radii)

for neighbors, ans_neighbors in zip(knn, ans_graph):
self.assertItemsEqual(neighbors, ans_neighbors)

def test_epsilon_graph(self):
"""
Test construction of the epsilon-nearest neighbor graph.
Expand Down
21 changes: 18 additions & 3 deletions debacl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@ def knn_graph(X, k, method='brute_force', leaf_size=30):
Parameters
----------
X : numpy array | list [numpy arrays]
Data points, with each row as an observation.
Data points, with each row as an observation
OR a distance matrix if method='precomputed'

k : int
The number of points to consider as neighbors of any given observation.

method : {'brute-force', 'kd-tree', 'ball-tree'}, optional
method : {'brute-force', 'kd-tree', 'ball-tree', 'precomputed'}, optional
Computing method.

- 'brute-force': computes the (Euclidean) distance between all O(n^2)
Expand All @@ -61,11 +62,14 @@ def knn_graph(X, k, method='brute_force', leaf_size=30):
distances. Typically much faster than 'brute-force', and works with
up to a few hundred dimensions. Requires the scikit-learn library.

- 'precomputed': used when the matrix X is a precomputed distance
matrix.

leaf_size : int, optional
For the 'kd-tree' and 'ball-tree' methods, the number of observations
in the leaf nodes. Leaves are not split further, so distance
computations within leaf nodes are done by brute force. 'leaf_size' is
ignored for the 'brute-force' method.
ignored for the 'brute-force' and 'precomputed' methods.

Returns
-------
Expand Down Expand Up @@ -109,6 +113,17 @@ def knn_graph(X, k, method='brute_force', leaf_size=30):
raise ImportError("The scikit-learn library could not be loaded." +
" It is required for the 'ball-tree' method.")

if method == 'precomputed':
if X.ndim != 2 or X.shape[0] != X.shape[1]:
raise ValueError("array 'X' must be square")

# Pseudo-sort each row of the distance matrix so that the indices of
# the closest k elements are listed first # The entries [...,:k-1] are
# not in sorted order by distance, but the kth entry is guaranteed to
# be in the correct spot
neighbors = _np.argpartition(X, k - 1)[:, :k]
radii = X[_np.arange(n), neighbors[:, -1]]

else: # assume brute-force
if not _HAS_SCIPY:
raise ImportError("The 'scipy' module could not be loaded. " +
Expand Down