From 1bbf01457e427937e479156873d82f02be858691 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Sat, 3 Sep 2022 12:16:24 +0200 Subject: [PATCH 01/12] make a new method as a target --- libpysal/weights/contiguity.py | 67 +++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 13 deletions(-) diff --git a/libpysal/weights/contiguity.py b/libpysal/weights/contiguity.py index 8522f128d..11fe8a708 100644 --- a/libpysal/weights/contiguity.py +++ b/libpysal/weights/contiguity.py @@ -158,7 +158,7 @@ def from_dataframe( an ordered list of ids to use to index the spatial weights object. If used, the resulting weights object will iterate over results in the order of the names provided in this - argument. + argument. See Also -------- @@ -182,6 +182,47 @@ def from_dataframe( df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs ) + @classmethod + def from_dataframe_new(cls, df, geom_col=None, ids=None, **kwargs): + """ + Construct a weights object from a (geo)pandas (Geo)DataFrame + with a geometry column. + + Parameters + ---------- + df : DataFrame + a :class: `pandas.DataFrame` containing geometries to use + for spatial weights + geom_col : string + the name of the column in `df` that contains the + geometries. Defaults to active geometry column. + ids : str, list, np.array, pd.Series (default None) + A definition of ids to use to index the spatial weights object. + Could be the name of the dataframe column, `list`, `numpy.array`, + or `pandas.Series`. If `list`, `numpy.array` or + `pandas.Series` are used then it must have same length as dataframe. + + **kwargs : dict + Additional arguments to be passed on to the W constructor + + + See Also + -------- + :class:`libpysal.weights.weights.W` + :class:`libpysal.weights.contiguity.Rook` + """ + + if geom_col is None: + geom_col = df.geometry.name + + if isinstance(ids, str): + ids = df[ids] + + if not isinstance(ids, list): + ids = ids.tolist() + + return cls.from_iterable(df[geom_col].tolist(), ids=ids, **kwargs) + @classmethod def from_xarray( cls, @@ -227,7 +268,7 @@ def from_xarray( Returns ------- w : libpysal.weights.W/libpysal.weights.WSP - instance of spatial weights class W or WSP with an index attribute + instance of spatial weights class W or WSP with an index attribute Notes ----- @@ -382,7 +423,7 @@ def from_dataframe(cls, df, geom_col=None, **kwargs): an ordered list of ids to use to index the spatial weights object. If used, the resulting weights object will iterate over results in the order of the names provided in this - argument. + argument. See Also -------- @@ -457,7 +498,7 @@ def from_xarray( Returns ------- w : libpysal.weights.W/libpysal.weights.WSP - instance of spatial weights class W or WSP with an index attribute + instance of spatial weights class W or WSP with an index attribute Notes ----- @@ -526,17 +567,17 @@ def Voronoi(points, criterion="rook", clip="ahull", **kwargs): def _from_dataframe(df, **kwargs): """ - Construct a voronoi contiguity weight directly from a dataframe. + Construct a voronoi contiguity weight directly from a dataframe. Note that if criterion='rook', this is identical to the delaunay - graph for the points. + graph for the points. If the input dataframe is of any other geometry type than "Point", - a value error is raised. + a value error is raised. Parameters ---------- df : pandas.DataFrame - dataframe containing point geometries for a + dataframe containing point geometries for a voronoi diagram. Returns @@ -561,14 +602,14 @@ def _from_dataframe(df, **kwargs): def _build(polygons, criterion="rook", ids=None): """ - This is a developer-facing function to construct a spatial weights object. + This is a developer-facing function to construct a spatial weights object. Parameters ---------- polygons : list list of pysal polygons to use to build contiguity criterion : string - option of which kind of contiguity to build. Is either "rook" or "queen" + option of which kind of contiguity to build. Is either "rook" or "queen" ids : list list of ids to use to index the neighbor dictionary @@ -576,12 +617,12 @@ def _build(polygons, criterion="rook", ids=None): ------- tuple containing (neighbors, ids), where neighbors is a dictionary describing contiguity relations and ids is the list of ids used to index - that dictionary. + that dictionary. NOTE: this is different from the prior behavior of buildContiguity, which returned an actual weights object. Since this just dispatches for the classes above, this returns the raw ingredients for a spatial weights - object, not the object itself. + object, not the object itself. """ if ids and len(ids) != len(set(ids)): raise ValueError( @@ -621,7 +662,7 @@ def buildContiguity(polygons, criterion="rook", ids=None): This is a deprecated function. It builds a contiguity W from the polygons provided. As such, it is now - identical to calling the class constructors for Rook or Queen. + identical to calling the class constructors for Rook or Queen. """ # Warn('This function is deprecated. Please use the Rook or Queen classes', # UserWarning) From 936d5de865e7c24a5eeca39003bb6a4642b8f605 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Fri, 28 Oct 2022 20:37:35 +0200 Subject: [PATCH 02/12] contiguity --- libpysal/weights/contiguity.py | 201 ++++++++++++++++++++++----------- 1 file changed, 132 insertions(+), 69 deletions(-) diff --git a/libpysal/weights/contiguity.py b/libpysal/weights/contiguity.py index 11fe8a708..f47caa6d8 100644 --- a/libpysal/weights/contiguity.py +++ b/libpysal/weights/contiguity.py @@ -1,4 +1,6 @@ import itertools +from types import NoneType +import warnings import numpy @@ -133,10 +135,17 @@ def from_iterable(cls, iterable, sparse=False, **kwargs): @classmethod def from_dataframe( - cls, df, geom_col=None, idVariable=None, ids=None, id_order=None, **kwargs + cls, + df, + geom_col=None, + idVariable=None, + ids=None, + id_order=None, + use_index=None, + **kwargs, ): """ - Construct a weights object from a pandas dataframe with a geometry + Construct a weights object from a (geo)pandas dataframe with a geometry column. This will cast the polygons to PySAL polygons, then build the W using ids from the dataframe. @@ -149,16 +158,24 @@ def from_dataframe( the name of the column in `df` that contains the geometries. Defaults to active geometry column. idVariable : string + DEPRECATED - use `ids` instead. the name of the column to use as IDs. If nothing is provided, the dataframe index is used - ids : list - a list of ids to use to index the spatial weights object. - Order is not respected from this list. + ids : list-like, string + a list-like of ids to use to index the spatial weights object or + the name of the column to use as IDs. If nothing is + provided, the dataframe index is used if `use_index=True` or + a positional index is used if `use_index=False`. + Order of the resulting W is not respected from this list. id_order : list - an ordered list of ids to use to index the spatial weights + DEPRECATED - argument is deprecated and will be removed. + An ordered list of ids to use to index the spatial weights object. If used, the resulting weights object will iterate over results in the order of the names provided in this argument. + use_index : bool + use index of `df` as `ids` to index the spatial weights object. + Defaults to False but in future will default to True. See Also -------- @@ -167,61 +184,58 @@ def from_dataframe( """ if geom_col is None: geom_col = df.geometry.name + if id_order is not None: + warnings.warn( + "`id_order` is deprecated and will be removed in future.", + FutureWarning, + stacklevel=2, + ) if id_order is True and ((idVariable is not None) or (ids is not None)): # if idVariable is None, we want ids. Otherwise, we want the # idVariable column id_order = list(df.get(idVariable, ids)) else: id_order = df.get(id_order, ids) - elif idVariable is not None: - ids = df.get(idVariable).tolist() - elif isinstance(ids, str): - ids = df.get(ids).tolist() - return cls.from_iterable( - df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs - ) - @classmethod - def from_dataframe_new(cls, df, geom_col=None, ids=None, **kwargs): - """ - Construct a weights object from a (geo)pandas (Geo)DataFrame - with a geometry column. - - Parameters - ---------- - df : DataFrame - a :class: `pandas.DataFrame` containing geometries to use - for spatial weights - geom_col : string - the name of the column in `df` that contains the - geometries. Defaults to active geometry column. - ids : str, list, np.array, pd.Series (default None) - A definition of ids to use to index the spatial weights object. - Could be the name of the dataframe column, `list`, `numpy.array`, - or `pandas.Series`. If `list`, `numpy.array` or - `pandas.Series` are used then it must have same length as dataframe. - - **kwargs : dict - Additional arguments to be passed on to the W constructor - - - See Also - -------- - :class:`libpysal.weights.weights.W` - :class:`libpysal.weights.contiguity.Rook` - """ - - if geom_col is None: - geom_col = df.geometry.name + if idVariable is not None: + if ids is None: + warnings.warn( + "`idVariable` is deprecated and will be removed in future. " + "Use `ids` instead.", + FutureWarning, + stacklevel=2, + ) + ids = idVariable + else: + warnings.warn( + "Both `idVariable` and `ids` passed, using `ids`.", + UserWarning, + stacklevel=2, + ) + + if ids is None: + if use_index is None: + warnings.warn( + "`use_index` defaults to False but will default to True in future. " + "Set True/False directly to control this behavior and silence this " + "warning", + FutureWarning, + stacklevel=2, + ) + use_index = False + if use_index: + ids = df.index.tolist() if isinstance(ids, str): ids = df[ids] - if not isinstance(ids, list): + if not isinstance(ids, (list, NoneType)): ids = ids.tolist() - return cls.from_iterable(df[geom_col].tolist(), ids=ids, **kwargs) + return cls.from_iterable( + df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs + ) @classmethod def from_xarray( @@ -399,9 +413,18 @@ def from_iterable(cls, iterable, sparse=False, **kwargs): return w @classmethod - def from_dataframe(cls, df, geom_col=None, **kwargs): + def from_dataframe( + cls, + df, + geom_col=None, + idVariable=None, + ids=None, + id_order=None, + use_index=None, + **kwargs, + ): """ - Construct a weights object from a pandas dataframe with a geometry + Construct a weights object from a (geo)pandas dataframe with a geometry column. This will cast the polygons to PySAL polygons, then build the W using ids from the dataframe. @@ -412,46 +435,86 @@ def from_dataframe(cls, df, geom_col=None, **kwargs): for spatial weights geom_col : string the name of the column in `df` that contains the - geometries. Defaults to active geometry column + geometries. Defaults to active geometry column. idVariable : string + DEPRECATED - use `ids` instead. the name of the column to use as IDs. If nothing is provided, the dataframe index is used - ids : list - a list of ids to use to index the spatial weights object. - Order is not respected from this list. + ids : list-like, string + a list-like of ids to use to index the spatial weights object or + the name of the column to use as IDs. If nothing is + provided, the dataframe index is used if `use_index=True` or + a positional index is used if `use_index=False`. + Order of the resulting W is not respected from this list. id_order : list - an ordered list of ids to use to index the spatial weights + DEPRECATED - argument is deprecated and will be removed. + An ordered list of ids to use to index the spatial weights object. If used, the resulting weights object will iterate over results in the order of the names provided in this argument. + use_index : bool + use index of `df` as `ids` to index the spatial weights object. + Defaults to False but in future will default to True. See Also -------- :class:`libpysal.weights.weights.W` - :class:`libpysal.weights.contiguity.Queen` + :class:`libpysal.weights.contiguity.Rook` """ - idVariable = kwargs.pop("idVariable", None) - ids = kwargs.pop("ids", None) - id_order = kwargs.pop("id_order", None) if geom_col is None: geom_col = df.geometry.name + if id_order is not None: + warnings.warn( + "`id_order` is deprecated and will be removed in future.", + FutureWarning, + stacklevel=2, + ) if id_order is True and ((idVariable is not None) or (ids is not None)): # if idVariable is None, we want ids. Otherwise, we want the # idVariable column - ids = list(df.get(idVariable, ids)) - id_order = ids - elif isinstance(id_order, str): - ids = df.get(id_order, ids) - id_order = ids - elif idVariable is not None: - ids = df.get(idVariable).tolist() - elif isinstance(ids, str): - ids = df.get(ids).tolist() - w = cls.from_iterable( + id_order = list(df.get(idVariable, ids)) + else: + id_order = df.get(id_order, ids) + + if idVariable is not None: + if ids is None: + warnings.warn( + "`idVariable` is deprecated and will be removed in future. " + "Use `ids` instead.", + FutureWarning, + stacklevel=2, + ) + ids = idVariable + else: + warnings.warn( + "Both `idVariable` and `ids` passed, using `ids`.", + UserWarning, + stacklevel=2, + ) + + if ids is None: + if use_index is None: + warnings.warn( + "`use_index` defaults to False but will default to True in future. " + "Set True/False directly to control this behavior and silence this " + "warning", + FutureWarning, + stacklevel=2, + ) + use_index = False + if use_index: + ids = df.index.tolist() + + if isinstance(ids, str): + ids = df[ids] + + if not isinstance(ids, (list, NoneType)): + ids = ids.tolist() + + return cls.from_iterable( df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs ) - return w @classmethod def from_xarray( From ba6eea3b170d981cde73ca16aa2cb533d56a7a54 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Fri, 28 Oct 2022 20:42:03 +0200 Subject: [PATCH 03/12] rm idVariable from test --- libpysal/weights/tests/test_contiguity.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/libpysal/weights/tests/test_contiguity.py b/libpysal/weights/tests/test_contiguity.py index 06a30414d..b6d9e12db 100644 --- a/libpysal/weights/tests/test_contiguity.py +++ b/libpysal/weights/tests/test_contiguity.py @@ -21,6 +21,7 @@ try: import shapely + HAS_SHAPELY = True except ImportError: HAS_SHAPELY = False @@ -97,7 +98,6 @@ def test_from_array(self): # test named, sparse from point array pass - @ut.skipIf(PANDAS_EXTINCT, "Missing pandas") def test_from_dataframe(self): # basic @@ -110,7 +110,6 @@ def test_from_dataframe(self): w = self.cls.from_dataframe(df, geom_col="the_geom") self.assertEqual(w[self.known_wi], self.known_w) - @ut.skipIf(GEOPANDAS_EXTINCT, "Missing geopandas") def test_from_geodataframe(self): df = pdio.read_files(self.polygon_path) @@ -121,7 +120,7 @@ def test_from_geodataframe(self): self.assertEqual(w[self.known_wi], self.known_w) # named geometry + named obs - w = self.cls.from_dataframe(df, geom_col="the_geom", idVariable=self.idVariable) + w = self.cls.from_dataframe(df, geom_col="the_geom", ids=self.idVariable) self.assertEqual(w[self.known_name], self.known_namedw) def test_from_xarray(self): @@ -129,7 +128,7 @@ def test_from_xarray(self): self.assertEqual(w[self.known_wi_da], self.known_w_da) ws = self.cls.from_xarray(self.da) srowvec = ws.sparse[self.known_wspi_da].todense().tolist()[0] - this_w = {i:k for i,k in enumerate(srowvec) if k>0} + this_w = {i: k for i, k in enumerate(srowvec) if k > 0} self.assertEqual(this_w, self.known_wsp_da) @@ -151,7 +150,7 @@ def setUp(self): self.cls = c.Queen self.idVariable = "POLYID" self.known_name = 5 - self.known_namedw = {k+1:v for k,v in list(self.known_w.items())} + self.known_namedw = {k + 1: v for k, v in list(self.known_w.items())} self.known_wspi_da = 1 self.known_wsp_da = {0: 1, 2: 1, 4: 1, 5: 1, 6: 1} self.known_wi_da = (1, -30.0, -60.0) @@ -163,8 +162,8 @@ def setUp(self): (1, -30.0, 60.0): 1, (1, 30.0, -180.0): 1, (1, 30.0, -60.0): 1, - (1, 30.0, 60.0): 1 - } + (1, 30.0, 60.0): 1, + } @ut.skipIf(GEOPANDAS_EXTINCT, "Missing Geopandas") def test_linestrings(self): @@ -197,11 +196,15 @@ def setUp(self): self.cls = c.Rook self.idVariable = "POLYID" self.known_name = 5 - self.known_namedw = {k+1:v for k,v in list(self.known_w.items())} + self.known_namedw = {k + 1: v for k, v in list(self.known_w.items())} self.known_wspi_da = 1 self.known_wsp_da = {0: 1, 2: 1, 5: 1} self.known_wi_da = (1, -30.0, -180.0) - self.known_w_da = {(1, 30.0, -180.0): 1, (1, -30.0, -60.0): 1, (1, -90.0, -180.0): 1} + self.known_w_da = { + (1, 30.0, -180.0): 1, + (1, -30.0, -60.0): 1, + (1, -90.0, -180.0): 1, + } class Test_Voronoi(ut.TestCase): From 2e2ef649ac70666266003238c2322381a2efbde9 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Fri, 28 Oct 2022 20:50:16 +0200 Subject: [PATCH 04/12] distance --- libpysal/weights/distance.py | 70 ++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/libpysal/weights/distance.py b/libpysal/weights/distance.py index 652a5f545..aea98dcf2 100644 --- a/libpysal/weights/distance.py +++ b/libpysal/weights/distance.py @@ -83,20 +83,20 @@ class KNN(W): Notes ----- - Ties between neighbors of equal distance are arbitrarily broken. + Ties between neighbors of equal distance are arbitrarily broken. - Further, if many points occupy the same spatial location (i.e. observations are - coincident), then you may need to increase k for those observations to + Further, if many points occupy the same spatial location (i.e. observations are + coincident), then you may need to increase k for those observations to acquire neighbors at different spatial locations. For example, if five points are coincident, then their four nearest neighbors will all occupy the same spatial location; only the fifth nearest neighbor will result in those coincident points becoming connected to the graph as a - whole. + whole. Solutions to this problem include jittering the points (by adding - a small random value to each observation's location) or by adding + a small random value to each observation's location) or by adding higher-k neighbors only to the coincident points, using the - weights.w_sets.w_union() function. + weights.w_sets.w_union() function. See Also -------- @@ -271,7 +271,9 @@ def from_array(cls, array, *args, **kwargs): return cls(array, *args, **kwargs) @classmethod - def from_dataframe(cls, df, geom_col=None, ids=None, *args, **kwargs): + def from_dataframe( + cls, df, geom_col=None, ids=None, use_index=True, *args, **kwargs + ): """ Make KNN weights from a dataframe. @@ -283,10 +285,14 @@ def from_dataframe(cls, df, geom_col=None, ids=None, *args, **kwargs): geom_col : string the name of the column in `df` that contains the geometries. Defaults to active geometry column. - ids : string or iterable - if string, the column name of the indices from the dataframe - if iterable, a list of ids to use for the W - if None, df.index is used. + ids : list-like, string + a list-like of ids to use to index the spatial weights object or + the name of the column to use as IDs. If nothing is + provided, the dataframe index is used if `use_index=True` or + a positional index is used if `use_index=False`. + Order of the resulting W is not respected from this list. + use_index : bool + use index of `df` as `ids` to index the spatial weights object. See Also -------- @@ -295,7 +301,7 @@ def from_dataframe(cls, df, geom_col=None, ids=None, *args, **kwargs): if geom_col is None: geom_col = df.geometry.name pts = get_points_array(df[geom_col]) - if ids is None: + if ids is None and use_index: ids = df.index.tolist() elif isinstance(ids, str): ids = df[ids].tolist() @@ -603,7 +609,7 @@ def from_array(cls, array, **kwargs): return cls(array, **kwargs) @classmethod - def from_dataframe(cls, df, geom_col=None, ids=None, **kwargs): + def from_dataframe(cls, df, geom_col=None, ids=None, use_index=True, **kwargs): """ Make Kernel weights from a dataframe. @@ -615,10 +621,14 @@ def from_dataframe(cls, df, geom_col=None, ids=None, **kwargs): geom_col : string the name of the column in `df` that contains the geometries. Defaults to active geometry column. - ids : string or iterable - if string, the column name of the indices from the dataframe - if iterable, a list of ids to use for the W - if None, df.index is used. + ids : list-like, string + a list-like of ids to use to index the spatial weights object or + the name of the column to use as IDs. If nothing is + provided, the dataframe index is used if `use_index=True` or + a positional index is used if `use_index=False`. + Order of the resulting W is not respected from this list. + use_index : bool + use index of `df` as `ids` to index the spatial weights object. See Also -------- @@ -627,7 +637,7 @@ def from_dataframe(cls, df, geom_col=None, ids=None, **kwargs): if geom_col is None: geom_col = df.geometry.name pts = get_points_array(df[geom_col]) - if ids is None: + if ids is None and use_index: ids = df.index.tolist() elif isinstance(ids, str): ids = df[ids].tolist() @@ -691,13 +701,13 @@ def _eval_kernel(self): elif self.function == "uniform": self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs] elif self.function == "quadratic": - self.kernel = [(3.0 / 4) * (1 - zi ** 2) for zi in zs] + self.kernel = [(3.0 / 4) * (1 - zi**2) for zi in zs] elif self.function == "quartic": - self.kernel = [(15.0 / 16) * (1 - zi ** 2) ** 2 for zi in zs] + self.kernel = [(15.0 / 16) * (1 - zi**2) ** 2 for zi in zs] elif self.function == "gaussian": c = np.pi * 2 c = c ** (-0.5) - self.kernel = [c * np.exp(-(zi ** 2) / 2.0) for zi in zs] + self.kernel = [c * np.exp(-(zi**2) / 2.0) for zi in zs] else: print(("Unsupported kernel function", self.function)) @@ -881,7 +891,9 @@ def from_array(cls, array, threshold, **kwargs): return cls(array, threshold, **kwargs) @classmethod - def from_dataframe(cls, df, threshold, geom_col=None, ids=None, **kwargs): + def from_dataframe( + cls, df, threshold, geom_col=None, ids=None, use_index=True, **kwargs + ): """ Make DistanceBand weights from a dataframe. @@ -894,16 +906,20 @@ def from_dataframe(cls, df, threshold, geom_col=None, ids=None, **kwargs): geom_col : string the name of the column in `df` that contains the geometries. Defaults to active geometry column. - ids : string or iterable - if string, the column name of the indices from the dataframe - if iterable, a list of ids to use for the W - if None, df.index is used. + ids : list-like, string + a list-like of ids to use to index the spatial weights object or + the name of the column to use as IDs. If nothing is + provided, the dataframe index is used if `use_index=True` or + a positional index is used if `use_index=False`. + Order of the resulting W is not respected from this list. + use_index : bool + use index of `df` as `ids` to index the spatial weights object. """ if geom_col is None: geom_col = df.geometry.name pts = get_points_array(df[geom_col]) - if ids is None: + if ids is None and use_index: ids = df.index.tolist() elif isinstance(ids, str): ids = df[ids].tolist() From 3e8e353d5fe675e29fed9fbd4a4dceadf2b8c3ff Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Fri, 28 Oct 2022 20:57:23 +0200 Subject: [PATCH 05/12] avoid NonType --- libpysal/weights/contiguity.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libpysal/weights/contiguity.py b/libpysal/weights/contiguity.py index f47caa6d8..ae207d64d 100644 --- a/libpysal/weights/contiguity.py +++ b/libpysal/weights/contiguity.py @@ -1,5 +1,4 @@ import itertools -from types import NoneType import warnings import numpy @@ -230,7 +229,7 @@ def from_dataframe( if isinstance(ids, str): ids = df[ids] - if not isinstance(ids, (list, NoneType)): + if not isinstance(ids, list) and ids is not None: ids = ids.tolist() return cls.from_iterable( @@ -509,7 +508,7 @@ def from_dataframe( if isinstance(ids, str): ids = df[ids] - if not isinstance(ids, (list, NoneType)): + if not isinstance(ids, list) and ids is not None: ids = ids.tolist() return cls.from_iterable( From 47ae2047bf22083e71459ed4af58e76e9c78d074 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Fri, 28 Oct 2022 21:02:37 +0200 Subject: [PATCH 06/12] check length --- libpysal/weights/contiguity.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libpysal/weights/contiguity.py b/libpysal/weights/contiguity.py index ae207d64d..c5f824a2d 100644 --- a/libpysal/weights/contiguity.py +++ b/libpysal/weights/contiguity.py @@ -232,6 +232,9 @@ def from_dataframe( if not isinstance(ids, list) and ids is not None: ids = ids.tolist() + if len(ids) != len(df): + raise ValueError("The length of `ids` does not match the length of df.") + return cls.from_iterable( df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs ) @@ -458,7 +461,7 @@ def from_dataframe( See Also -------- :class:`libpysal.weights.weights.W` - :class:`libpysal.weights.contiguity.Rook` + :class:`libpysal.weights.contiguity.Queen` """ if geom_col is None: geom_col = df.geometry.name @@ -511,6 +514,9 @@ def from_dataframe( if not isinstance(ids, list) and ids is not None: ids = ids.tolist() + if len(ids) != len(df): + raise ValueError("The length of `ids` does not match the length of df.") + return cls.from_iterable( df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs ) From 081f96d600f6edc3f68832b454284e2d06d92e75 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Fri, 28 Oct 2022 21:05:41 +0200 Subject: [PATCH 07/12] redo to minimise checks --- libpysal/weights/contiguity.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/libpysal/weights/contiguity.py b/libpysal/weights/contiguity.py index c5f824a2d..f15a60833 100644 --- a/libpysal/weights/contiguity.py +++ b/libpysal/weights/contiguity.py @@ -226,14 +226,15 @@ def from_dataframe( if use_index: ids = df.index.tolist() - if isinstance(ids, str): - ids = df[ids] + else: + if isinstance(ids, str): + ids = df[ids] - if not isinstance(ids, list) and ids is not None: - ids = ids.tolist() + if not isinstance(ids, list): + ids = ids.tolist() - if len(ids) != len(df): - raise ValueError("The length of `ids` does not match the length of df.") + if len(ids) != len(df): + raise ValueError("The length of `ids` does not match the length of df.") return cls.from_iterable( df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs @@ -508,14 +509,15 @@ def from_dataframe( if use_index: ids = df.index.tolist() - if isinstance(ids, str): - ids = df[ids] + else: + if isinstance(ids, str): + ids = df[ids] - if not isinstance(ids, list) and ids is not None: - ids = ids.tolist() + if not isinstance(ids, list): + ids = ids.tolist() - if len(ids) != len(df): - raise ValueError("The length of `ids` does not match the length of df.") + if len(ids) != len(df): + raise ValueError("The length of `ids` does not match the length of df.") return cls.from_iterable( df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs From c4f00f2ecfdc24547aa89bf448ab2f49f323ce41 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Fri, 28 Oct 2022 21:49:02 +0200 Subject: [PATCH 08/12] add the same options to Delaunay --- libpysal/weights/gabriel.py | 167 ++++++++++++++++++++++-------------- 1 file changed, 101 insertions(+), 66 deletions(-) diff --git a/libpysal/weights/gabriel.py b/libpysal/weights/gabriel.py index fd90a5c79..58b8e32c4 100644 --- a/libpysal/weights/gabriel.py +++ b/libpysal/weights/gabriel.py @@ -18,10 +18,10 @@ class Delaunay(W): """ - Constructor of the Delaunay graph of a set of input points. + Constructor of the Delaunay graph of a set of input points. Relies on scipy.spatial.Delaunay and numba to quickly construct a graph from the input set of points. Will be slower without numba, - and will warn if this is missing. + and will warn if this is missing. Arguments --------- @@ -30,7 +30,7 @@ class Delaunay(W): delaunay triangulation **kwargs : keyword argument list keyword arguments passed directly to weights.W - + Notes ----- The Delaunay triangulation can result in quite a few non-local links among @@ -40,22 +40,24 @@ class Delaunay(W): The weights.Voronoi class builds a voronoi diagram among the points, clips the Voronoi cells, and then constructs an adjacency graph among the clipped cells. This graph among the clipped Voronoi cells generally represents the structure - of local adjacencies better than the "raw" Delaunay graph. + of local adjacencies better than the "raw" Delaunay graph. The weights.gabriel.Gabriel graph constructs a Delaunay graph, but only - includes the "short" links in the Delaunay graph. + includes the "short" links in the Delaunay graph. However, if the unresricted Delaunay triangulation is needed, this class will compute it much more quickly than Voronoi(coordinates, clip=None). """ + def __init__(self, coordinates, **kwargs): try: from numba import njit except ModuleNotFoundError: - warnings.warn("The numba package is used extensively in this module" - " to accelerate the computation of graphs. Without numba," - " these computations may become unduly slow on large data." - ) + warnings.warn( + "The numba package is used extensively in this module" + " to accelerate the computation of graphs. Without numba," + " these computations may become unduly slow on large data." + ) edges, _ = self._voronoi_edges(coordinates) voronoi_neighbors = pandas.DataFrame(edges).groupby(0)[1].apply(list).to_dict() W.__init__(self, voronoi_neighbors, **kwargs) @@ -72,52 +74,84 @@ def _voronoi_edges(self, coordinates): return edges, dt @classmethod - def from_dataframe(cls, df, **kwargs): + def from_dataframe(cls, df, geom_col=None, ids=None, use_index=None, **kwargs): """ - Construct a Delaunay triangulation from a geopandas GeoDataFrame. - Not that the input geometries in the dataframe must be Points. - Polygons or lines must be converted to points (e.g. using + Construct a Delaunay triangulation from a geopandas GeoDataFrame. + Not that the input geometries in the dataframe must be Points. + Polygons or lines must be converted to points (e.g. using df.geometry.centroid). Arguments --------- df : geopandas.GeoDataFrame - GeoDataFrame containing points to construct the Delaunay - Triangulation. + GeoDataFrame containing points to construct the Delaunay + Triangulation. + geom_col : string + the name of the column in `df` that contains the + geometries. Defaults to active geometry column. + ids : list-like, string + a list-like of ids to use to index the spatial weights object or + the name of the column to use as IDs. If nothing is + provided, the dataframe index is used if `use_index=True` or + a positional index is used if `use_index=False`. + Order of the resulting W is not respected from this list. + use_index : bool + use index of `df` as `ids` to index the spatial weights object. **kwargs : keyword arguments Keyword arguments that are passed downwards to the weights.W constructor. """ - geomtypes = df.geometry.type.unique() + if isinstance(df, pandas.Series): + df = df.to_frame("geometry") + if geom_col is None: + geom_col = df.geometry.name + geomtypes = df[geom_col].geom_type.unique() + + if ids is None: + if use_index is None: + warnings.warn( + "`use_index` defaults to False but will default to True in future. " + "Set True/False directly to control this behavior and silence this " + "warning", + FutureWarning, + stacklevel=2, + ) + use_index = False + if use_index: + ids = df.index.tolist() + + elif isinstance(ids, str): + ids = df[ids].tolist() + try: assert len(geomtypes) == 1 - assert geomtypes[0] == 'Point' + assert geomtypes[0] == "Point" point_array = numpy.column_stack( - (df.geometry.x.values, df.geometry.y.values) - ) - return cls(point_array, **kwargs) + (df[geom_col].x.values, df[geom_col].y.values) + ) + return cls(point_array, ids=ids, **kwargs) except AssertionError: raise TypeError( - f'The input dataframe has geometry types {geomtypes}' - f' but this delaunay triangulation is only well-defined for points.' - f' Choose a method to convert your dataframe into points (like using' - f' the df.centroid) and use that to estimate this graph.' - ) + f"The input dataframe has geometry types {geomtypes}" + f" but this delaunay triangulation is only well-defined for points." + f" Choose a method to convert your dataframe into points (like using" + f" the df.centroid) and use that to estimate this graph." + ) class Gabriel(Delaunay): """ - Constructs the Gabriel graph of a set of points. This graph is a subset of - the Delaunay triangulation where only "short" links are retained. This + Constructs the Gabriel graph of a set of points. This graph is a subset of + the Delaunay triangulation where only "short" links are retained. This function is also accelerated using numba, and implemented on top of the - scipy.spatial.Delaunay class. + scipy.spatial.Delaunay class. For a link (i,j) connecting node i to j in the Delaunay triangulation to be retained in the Gabriel graph, it must pass a point set exclusion test: 1. Construct the circle C_ij containing link (i,j) as its diameter - 2. If any other node k is contained within C_ij, then remove link (i,j) - from the graph. - 3. Once all links are evaluated, the remaining graph is the Gabriel graph. + 2. If any other node k is contained within C_ij, then remove link (i,j) + from the graph. + 3. Once all links are evaluated, the remaining graph is the Gabriel graph. Arguments --------- @@ -127,14 +161,16 @@ class Gabriel(Delaunay): **kwargs : keyword argument list keyword arguments passed directly to weights.W """ + def __init__(self, coordinates, **kwargs): try: from numba import njit except ModuleNotFoundError: - warnings.warn("The numba package is used extensively in this module" - " to accelerate the computation of graphs. Without numba," - " these computations may become unduly slow on large data." - ) + warnings.warn( + "The numba package is used extensively in this module" + " to accelerate the computation of graphs. Without numba," + " these computations may become unduly slow on large data." + ) edges, _ = self._voronoi_edges(coordinates) edges, dt = self._voronoi_edges(coordinates) droplist = _filter_gabriel( @@ -148,17 +184,17 @@ def __init__(self, coordinates, **kwargs): class Relative_Neighborhood(Delaunay): """ - Constructs the Relative Neighborhood graph from a set of points. - This graph is a subset of the Delaunay triangulation, where only + Constructs the Relative Neighborhood graph from a set of points. + This graph is a subset of the Delaunay triangulation, where only "relative neighbors" are retained. Further, it is a superset of the Minimum Spanning Tree, with additional "relative neighbors" introduced. - A relative neighbor pair of points i,j must be closer than the - maximum distance between i (or j) and each other point k. - This means that the points are at least as close to one another - as they are to any other point. - + A relative neighbor pair of points i,j must be closer than the + maximum distance between i (or j) and each other point k. + This means that the points are at least as close to one another + as they are to any other point. + Arguments --------- coordinates : array of points, (N,2) @@ -167,37 +203,37 @@ class Relative_Neighborhood(Delaunay): **kwargs : keyword argument list keyword arguments passed directly to weights.W """ + def __init__(self, coordinates, binary=True, **kwargs): try: from numba import njit except ModuleNotFoundError: - warnings.warn("The numba package is used extensively in this module" - " to accelerate the computation of graphs. Without numba," - " these computations may become unduly slow on large data." - ) + warnings.warn( + "The numba package is used extensively in this module" + " to accelerate the computation of graphs. Without numba," + " these computations may become unduly slow on large data." + ) edges, _ = self._voronoi_edges(coordinates) edges, dt = self._voronoi_edges(coordinates) - output, dkmax = _filter_relativehood( - edges, dt.points, return_dkmax=False - ) + output, dkmax = _filter_relativehood(edges, dt.points, return_dkmax=False) row, col, data = zip(*output) if binary: data = numpy.ones_like(col, dtype=float) - sp = sparse.csc_matrix((data, (row, col))) #TODO: faster way than this? - tmp = WSP(sp).to_W() + sp = sparse.csc_matrix((data, (row, col))) # TODO: faster way than this? + tmp = WSP(sp).to_W() W.__init__(self, tmp.neighbors, tmp.weights, **kwargs) - #### utilities + @njit def _edges_from_simplices(simplices): """ - Construct the sets of links that correspond to the edges of each + Construct the sets of links that correspond to the edges of each simplex. Each simplex has three "sides," and thus six undirected - edges. Thus, the input should be a list of three-length tuples, - that are then converted into the six non-directed edges for + edges. Thus, the input should be a list of three-length tuples, + that are then converted into the six non-directed edges for each simplex. """ edges = [] @@ -215,20 +251,20 @@ def _edges_from_simplices(simplices): def _filter_gabriel(edges, coordinates): """ For an input set of edges and coordinates, filter the input edges - depending on the Gabriel rule: + depending on the Gabriel rule: For each simplex, let i,j be the diameter of the circle defined by edge (i,j), and let k be the third point defining the simplex. The limiting case for the Gabriel rule is when k is also on the circle with diameter (i,j). In this limiting case, then simplex ijk must - be a right triangle, and dij**2 = djk**2 + dki**2 (by thales theorem). + be a right triangle, and dij**2 = djk**2 + dki**2 (by thales theorem). - This means that when dij**2 > djk**2 + dki**2, then k is inside the circle. - In contrast, when dij**2 < djk**2 + dji*2, k is outside of the circle. + This means that when dij**2 > djk**2 + dki**2, then k is inside the circle. + In contrast, when dij**2 < djk**2 + dji*2, k is outside of the circle. Therefore, it's sufficient to take each observation i, iterate over its - Delaunay neighbors j,k, and remove links whre dij**2 > djk**2 + dki**2 - in order to construct the Gabriel graph. + Delaunay neighbors j,k, and remove links whre dij**2 > djk**2 + dki**2 + in order to construct the Gabriel graph. """ edge_pointer = 0 n = edges.max() @@ -263,7 +299,7 @@ def _filter_gabriel(edges, coordinates): @njit def _filter_relativehood(edges, coordinates, return_dkmax=False): """ - This is a direct implementation of the algorithm from Toussaint (1980), RNG-2 + This is a direct implementation of the algorithm from Toussaint (1980), RNG-2 1. Compute the delaunay 2. for each edge of the delaunay (i,j), compute @@ -281,12 +317,12 @@ def _filter_relativehood(edges, coordinates, return_dkmax=False): pi = coordinates[i] pj = coordinates[j] dkmax = 0 - dij = ((pi - pj)**2).sum()**.5 + dij = ((pi - pj) ** 2).sum() ** 0.5 prune = False for k in range(n): pk = coordinates[k] - dik = ((pi - pk)**2).sum()**.5 - djk = ((pj - pk)**2).sum()**.5 + dik = ((pi - pk) ** 2).sum() ** 0.5 + djk = ((pj - pk) ** 2).sum() ** 0.5 distances = numpy.array([dik, djk, dkmax]) dkmax = distances.max() prune = dkmax < dij @@ -299,4 +335,3 @@ def _filter_relativehood(edges, coordinates, return_dkmax=False): r.append(dkmax) return out, r - From a61ae97ef8ba44cde4baa84e967dd282ae62a68c Mon Sep 17 00:00:00 2001 From: ljwolf Date: Thu, 3 Nov 2022 11:29:47 +0000 Subject: [PATCH 09/12] propagate relabelling for gabriel weights --- libpysal/weights/gabriel.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/libpysal/weights/gabriel.py b/libpysal/weights/gabriel.py index 58b8e32c4..8924a79fb 100644 --- a/libpysal/weights/gabriel.py +++ b/libpysal/weights/gabriel.py @@ -59,8 +59,15 @@ def __init__(self, coordinates, **kwargs): " these computations may become unduly slow on large data." ) edges, _ = self._voronoi_edges(coordinates) + ids = kwargs.pop("ids") + if ids is not None: + ids = numpy.asarray(ids) + edges = numpy.column_stack((ids[edges[:, 0]], ids[edges[:, 1]])) + else: + ids = numpy.arange(coordinates.shape[0]) + voronoi_neighbors = pandas.DataFrame(edges).groupby(0)[1].apply(list).to_dict() - W.__init__(self, voronoi_neighbors, **kwargs) + W.__init__(self, voronoi_neighbors, id_order=ids, **kwargs) def _voronoi_edges(self, coordinates): dt = _Delaunay(coordinates) @@ -171,15 +178,21 @@ def __init__(self, coordinates, **kwargs): " to accelerate the computation of graphs. Without numba," " these computations may become unduly slow on large data." ) - edges, _ = self._voronoi_edges(coordinates) edges, dt = self._voronoi_edges(coordinates) droplist = _filter_gabriel( edges, dt.points, ) - output = set(map(tuple, edges)).difference(set(droplist)) + output = numpy.row_stack(list(set(map(tuple, edges)).difference(set(droplist)))) + ids = kwargs.pop("ids") + if ids is not None: + ids = numpy.asarray(ids) + output = numpy.column_stack((ids[output[:, 0]], ids[output[:, 1]])) + else: + ids = numpy.arange(coordinates.shape[0]) + gabriel_neighbors = pandas.DataFrame(output).groupby(0)[1].apply(list).to_dict() - W.__init__(self, gabriel_neighbors, **kwargs) + W.__init__(self, gabriel_neighbors, id_order=ids, **kwargs) class Relative_Neighborhood(Delaunay): @@ -213,15 +226,18 @@ def __init__(self, coordinates, binary=True, **kwargs): " to accelerate the computation of graphs. Without numba," " these computations may become unduly slow on large data." ) - edges, _ = self._voronoi_edges(coordinates) edges, dt = self._voronoi_edges(coordinates) output, dkmax = _filter_relativehood(edges, dt.points, return_dkmax=False) row, col, data = zip(*output) if binary: data = numpy.ones_like(col, dtype=float) sp = sparse.csc_matrix((data, (row, col))) # TODO: faster way than this? - tmp = WSP(sp).to_W() - W.__init__(self, tmp.neighbors, tmp.weights, **kwargs) + ids = kwargs.pop("ids") + if ids is None: + ids = numpy.arange(sp.shape[0]) + ids = list(ids) + tmp = WSP(sp, id_order=ids).to_W() + W.__init__(self, tmp.neighbors, tmp.weights, id_order=ids, **kwargs) #### utilities From 5338bec1eb798ec107723a3f2670133ba9b2ed3c Mon Sep 17 00:00:00 2001 From: ljwolf Date: Thu, 3 Nov 2022 12:27:02 +0000 Subject: [PATCH 10/12] get, do not pop, from kwargs --- libpysal/weights/gabriel.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/libpysal/weights/gabriel.py b/libpysal/weights/gabriel.py index 8924a79fb..f7932d488 100644 --- a/libpysal/weights/gabriel.py +++ b/libpysal/weights/gabriel.py @@ -59,15 +59,16 @@ def __init__(self, coordinates, **kwargs): " these computations may become unduly slow on large data." ) edges, _ = self._voronoi_edges(coordinates) - ids = kwargs.pop("ids") + ids = kwargs.get("ids") if ids is not None: ids = numpy.asarray(ids) edges = numpy.column_stack((ids[edges[:, 0]], ids[edges[:, 1]])) + del kwargs["ids"] else: ids = numpy.arange(coordinates.shape[0]) voronoi_neighbors = pandas.DataFrame(edges).groupby(0)[1].apply(list).to_dict() - W.__init__(self, voronoi_neighbors, id_order=ids, **kwargs) + W.__init__(self, voronoi_neighbors, id_order=list(ids), **kwargs) def _voronoi_edges(self, coordinates): dt = _Delaunay(coordinates) @@ -184,15 +185,16 @@ def __init__(self, coordinates, **kwargs): dt.points, ) output = numpy.row_stack(list(set(map(tuple, edges)).difference(set(droplist)))) - ids = kwargs.pop("ids") + ids = kwargs.get("ids") if ids is not None: ids = numpy.asarray(ids) output = numpy.column_stack((ids[output[:, 0]], ids[output[:, 1]])) + del kwargs["ids"] else: ids = numpy.arange(coordinates.shape[0]) gabriel_neighbors = pandas.DataFrame(output).groupby(0)[1].apply(list).to_dict() - W.__init__(self, gabriel_neighbors, id_order=ids, **kwargs) + W.__init__(self, gabriel_neighbors, id_order=list(ids), **kwargs) class Relative_Neighborhood(Delaunay): @@ -232,9 +234,11 @@ def __init__(self, coordinates, binary=True, **kwargs): if binary: data = numpy.ones_like(col, dtype=float) sp = sparse.csc_matrix((data, (row, col))) # TODO: faster way than this? - ids = kwargs.pop("ids") + ids = kwargs.get("ids") if ids is None: ids = numpy.arange(sp.shape[0]) + else: + del kwargs["ids"] ids = list(ids) tmp = WSP(sp, id_order=ids).to_W() W.__init__(self, tmp.neighbors, tmp.weights, id_order=ids, **kwargs) From f5271260030b07cc7c981ded1704dee19de0a899 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Thu, 3 Nov 2022 14:53:42 +0100 Subject: [PATCH 11/12] preserve order in contiguity --- libpysal/weights/contiguity.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libpysal/weights/contiguity.py b/libpysal/weights/contiguity.py index f15a60833..9783d79f3 100644 --- a/libpysal/weights/contiguity.py +++ b/libpysal/weights/contiguity.py @@ -236,6 +236,9 @@ def from_dataframe( if len(ids) != len(df): raise ValueError("The length of `ids` does not match the length of df.") + if id_order is None: + id_order = ids + return cls.from_iterable( df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs ) @@ -519,6 +522,9 @@ def from_dataframe( if len(ids) != len(df): raise ValueError("The length of `ids` does not match the length of df.") + if id_order is None: + id_order = ids + return cls.from_iterable( df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs ) @@ -639,7 +645,7 @@ def _from_dataframe(df, **kwargs): """ Construct a voronoi contiguity weight directly from a dataframe. Note that if criterion='rook', this is identical to the delaunay - graph for the points. + graph for the points if no clipping of the voronoi cells is applied. If the input dataframe is of any other geometry type than "Point", a value error is raised. From c7966730d4a41fdd67f7204fe0b2f2a3acd63ff6 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Sun, 1 Jan 2023 22:31:02 +0100 Subject: [PATCH 12/12] test order --- libpysal/weights/tests/test_contiguity.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libpysal/weights/tests/test_contiguity.py b/libpysal/weights/tests/test_contiguity.py index b6d9e12db..ea6b3877f 100644 --- a/libpysal/weights/tests/test_contiguity.py +++ b/libpysal/weights/tests/test_contiguity.py @@ -123,6 +123,16 @@ def test_from_geodataframe(self): w = self.cls.from_dataframe(df, geom_col="the_geom", ids=self.idVariable) self.assertEqual(w[self.known_name], self.known_namedw) + @ut.skipIf(GEOPANDAS_EXTINCT, "Missing geopandas") + def test_from_geodataframe_order(self): + import geopandas + + south = geopandas.read_file(pysal_examples.get_path("south.shp")) + expected = south.FIPS.iloc[:5].tolist() + for ids_ in ("FIPS", south.FIPS): + w = self.cls.from_dataframe(south, ids=ids_) + self.assertEqual(w.id_order[:5], expected) + def test_from_xarray(self): w = self.cls.from_xarray(self.da, sparse=False, n_jobs=-1) self.assertEqual(w[self.known_wi_da], self.known_w_da)