Skip to content

Commit 7f66997

Browse files
authored
Merge pull request #285 from ljwolf/self-neighbors
remove non-coincident points correctly from knn weights
2 parents 7bca065 + fd0929f commit 7f66997

File tree

2 files changed

+41
-20
lines changed

2 files changed

+41
-20
lines changed

libpysal/weights/distance.py

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
get_points_array,
1212
WSP2W,
1313
)
14-
1514
import copy
1615
from warnings import warn as Warn
1716
from scipy.spatial import distance_matrix
@@ -84,7 +83,20 @@ class KNN(W):
8483
Notes
8584
-----
8685
87-
Ties between neighbors of equal distance are arbitrarily broken.
86+
Ties between neighbors of equal distance are arbitrarily broken.
87+
88+
Further, if many points occupy the same spatial location (i.e. observations are
89+
coincident), then you may need to increase k for those observations to
90+
acquire neighbors at different spatial locations. For example, if five
91+
points are coincident, then their four nearest neighbors will all
92+
occupy the same spatial location; only the fifth nearest neighbor will
93+
result in those coincident points becoming connected to the graph as a
94+
whole.
95+
96+
Solutions to this problem include jittering the points (by adding
97+
a small random value to each observation's location) or by adding
98+
higher-k neighbors only to the coincident points, using the
99+
weights.w_sets.w_union() function.
88100
89101
See Also
90102
--------
@@ -111,19 +123,30 @@ def __init__(
111123
self.data = self.kdtree.data
112124
self.k = k
113125
self.p = p
114-
this_nnq = self.kdtree.query(self.data, k=k + 1, p=p)
115126

116-
to_weight = this_nnq[1]
127+
# these are both n x k+1
128+
distances, indices = self.kdtree.query(self.data, k=k + 1, p=p)
129+
full_indices = np.arange(self.kdtree.n)
130+
131+
# if an element in the indices matrix is equal to the corresponding
132+
# index for that row, we want to mask that site from its neighbors
133+
not_self_mask = indices != full_indices.reshape(-1, 1)
134+
# if there are *too many duplicates per site*, then we may get some
135+
# rows where the site index is not in the set of k+1 neighbors
136+
# So, we need to know where these sites are
137+
has_one_too_many = not_self_mask.sum(axis=1) == (k + 1)
138+
# if a site has k+1 neighbors, drop its k+1th neighbor
139+
not_self_mask[has_one_too_many, -1] &= False
140+
not_self_indices = indices[not_self_mask].reshape(self.kdtree.n, -1)
141+
142+
to_weight = not_self_indices
117143
if ids is None:
118-
ids = list(range(to_weight.shape[0]))
119-
120-
neighbors = {}
121-
for i, row in enumerate(to_weight):
122-
row = row.tolist()
123-
row.remove(i)
124-
row = [ids[j] for j in row]
125-
focal = ids[i]
126-
neighbors[focal] = row
144+
ids = list(full_indices)
145+
named_indices = not_self_indices
146+
else:
147+
named_indices = np.asarray(ids)[not_self_indices]
148+
neighbors = {idx: list(indices) for idx, indices in zip(ids, named_indices)}
149+
127150
W.__init__(self, neighbors, id_order=ids, **kwargs)
128151

129152
@classmethod
@@ -693,6 +716,7 @@ class DistanceBand(W):
693716
threshold : float
694717
distance band
695718
p : float
719+
DEPRECATED: use `distance_metric`
696720
Minkowski p-norm distance metric parameter:
697721
1<=p<=infinity
698722
2: Euclidean distance
@@ -709,6 +733,7 @@ class DistanceBand(W):
709733
values to use for keys of the neighbors and weights dicts
710734
711735
build_sp : boolean
736+
DEPRECATED
712737
True to build sparse distance matrix and false to build dense
713738
distance matrix; significant speed gains may be obtained
714739
dending on the sparsity of the of distance_matrix and
@@ -766,12 +791,6 @@ class DistanceBand(W):
766791
>>> w.weights[0]
767792
[0.01, 0.007999999999999998]
768793
769-
Notes
770-
-----
771-
772-
This was initially implemented running scipy 0.8.0dev (in epd 6.1).
773-
earlier versions of scipy (0.7.0) have a logic bug in scipy/sparse/dok.py
774-
so serge changed line 221 of that file on sal-dev to fix the logic bug.
775794
776795
"""
777796

@@ -821,6 +840,7 @@ def __init__(
821840
else:
822841
self.data = data
823842
self.kdtree = None
843+
824844
self._band()
825845
neighbors, weights = self._distance_to_W(ids)
826846
W.__init__(
@@ -862,6 +882,7 @@ def from_array(cls, array, threshold, **kwargs):
862882

863883
@classmethod
864884
def from_dataframe(cls, df, threshold, geom_col=None, ids=None, **kwargs):
885+
865886
"""
866887
Make DistanceBand weights from a dataframe.
867888

libpysal/weights/tests/test_distance.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def test_arcdist(self):
217217
self.arc_points, distance_metric="Arc", radius=cg.sphere.RADIUS_EARTH_KM
218218
)
219219
npoints = self.arc_points.shape[0]
220-
full = np.matrix(
220+
full = np.array(
221221
[
222222
[arc(self.arc_points[i], self.arc_points[j]) for j in range(npoints)]
223223
for i in range(npoints)

0 commit comments

Comments
 (0)