11
11
get_points_array ,
12
12
WSP2W ,
13
13
)
14
-
15
14
import copy
16
15
from warnings import warn as Warn
17
16
from scipy .spatial import distance_matrix
@@ -84,7 +83,20 @@ class KNN(W):
84
83
Notes
85
84
-----
86
85
87
- Ties between neighbors of equal distance are arbitrarily broken.
86
+ Ties between neighbors of equal distance are arbitrarily broken.
87
+
88
+ Further, if many points occupy the same spatial location (i.e. observations are
89
+ coincident), then you may need to increase k for those observations to
90
+ acquire neighbors at different spatial locations. For example, if five
91
+ points are coincident, then their four nearest neighbors will all
92
+ occupy the same spatial location; only the fifth nearest neighbor will
93
+ result in those coincident points becoming connected to the graph as a
94
+ whole.
95
+
96
+ Solutions to this problem include jittering the points (by adding
97
+ a small random value to each observation's location) or by adding
98
+ higher-k neighbors only to the coincident points, using the
99
+ weights.w_sets.w_union() function.
88
100
89
101
See Also
90
102
--------
@@ -111,19 +123,30 @@ def __init__(
111
123
self .data = self .kdtree .data
112
124
self .k = k
113
125
self .p = p
114
- this_nnq = self .kdtree .query (self .data , k = k + 1 , p = p )
115
126
116
- to_weight = this_nnq [1 ]
127
+ # these are both n x k+1
128
+ distances , indices = self .kdtree .query (self .data , k = k + 1 , p = p )
129
+ full_indices = np .arange (self .kdtree .n )
130
+
131
+ # if an element in the indices matrix is equal to the corresponding
132
+ # index for that row, we want to mask that site from its neighbors
133
+ not_self_mask = indices != full_indices .reshape (- 1 , 1 )
134
+ # if there are *too many duplicates per site*, then we may get some
135
+ # rows where the site index is not in the set of k+1 neighbors
136
+ # So, we need to know where these sites are
137
+ has_one_too_many = not_self_mask .sum (axis = 1 ) == (k + 1 )
138
+ # if a site has k+1 neighbors, drop its k+1th neighbor
139
+ not_self_mask [has_one_too_many , - 1 ] &= False
140
+ not_self_indices = indices [not_self_mask ].reshape (self .kdtree .n , - 1 )
141
+
142
+ to_weight = not_self_indices
117
143
if ids is None :
118
- ids = list (range (to_weight .shape [0 ]))
119
-
120
- neighbors = {}
121
- for i , row in enumerate (to_weight ):
122
- row = row .tolist ()
123
- row .remove (i )
124
- row = [ids [j ] for j in row ]
125
- focal = ids [i ]
126
- neighbors [focal ] = row
144
+ ids = list (full_indices )
145
+ named_indices = not_self_indices
146
+ else :
147
+ named_indices = np .asarray (ids )[not_self_indices ]
148
+ neighbors = {idx : list (indices ) for idx , indices in zip (ids , named_indices )}
149
+
127
150
W .__init__ (self , neighbors , id_order = ids , ** kwargs )
128
151
129
152
@classmethod
@@ -693,6 +716,7 @@ class DistanceBand(W):
693
716
threshold : float
694
717
distance band
695
718
p : float
719
+ DEPRECATED: use `distance_metric`
696
720
Minkowski p-norm distance metric parameter:
697
721
1<=p<=infinity
698
722
2: Euclidean distance
@@ -709,6 +733,7 @@ class DistanceBand(W):
709
733
values to use for keys of the neighbors and weights dicts
710
734
711
735
build_sp : boolean
736
+ DEPRECATED
712
737
True to build sparse distance matrix and false to build dense
713
738
distance matrix; significant speed gains may be obtained
714
739
dending on the sparsity of the of distance_matrix and
@@ -766,12 +791,6 @@ class DistanceBand(W):
766
791
>>> w.weights[0]
767
792
[0.01, 0.007999999999999998]
768
793
769
- Notes
770
- -----
771
-
772
- This was initially implemented running scipy 0.8.0dev (in epd 6.1).
773
- earlier versions of scipy (0.7.0) have a logic bug in scipy/sparse/dok.py
774
- so serge changed line 221 of that file on sal-dev to fix the logic bug.
775
794
776
795
"""
777
796
@@ -821,6 +840,7 @@ def __init__(
821
840
else :
822
841
self .data = data
823
842
self .kdtree = None
843
+
824
844
self ._band ()
825
845
neighbors , weights = self ._distance_to_W (ids )
826
846
W .__init__ (
@@ -862,6 +882,7 @@ def from_array(cls, array, threshold, **kwargs):
862
882
863
883
@classmethod
864
884
def from_dataframe (cls , df , threshold , geom_col = None , ids = None , ** kwargs ):
885
+
865
886
"""
866
887
Make DistanceBand weights from a dataframe.
867
888
0 commit comments