diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py index f5e74bb..de7db0f 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py @@ -603,25 +603,67 @@ def point_coords(obj, dimensions=None): def to_geopandas(obj): - """Convert a geoarrow-like array into a ``geopandas.GeoSeries``. + """Convert a geoarrow-like array or table into a GeoSeries/DataFrame + These are thin wrappers around ``GeoSeries.from_arrow()`` and + ``GeoDataFrame.from_arrow()`` where available, falling back on conversion + through WKB if using an older version of GeoPandas or an Arrow array type + that GeoPandas doesn't support. + + >>> import pyarrow as pa >>> import geoarrow.pyarrow as ga >>> array = ga.as_geoarrow(["POINT (0 1)"]) >>> ga.to_geopandas(array) 0 POINT (0 1) dtype: geometry + >>> table = pa.table({"geometry": array}) + >>> ga.to_geopandas(table) + geometry + 0 POINT (0 1) """ import geopandas import pandas as pd + # Heuristic to detect table-like objects + is_table_like = ( + hasattr(obj, "schema") + and not callable(obj.schema) + and isinstance(obj.schema, pa.Schema) + ) + # Attempt GeoPandas from_arrow first try: - return geopandas.GeoSeries.from_arrow(obj) + if is_table_like: + return geopandas.GeoDataFrame.from_arrow(obj) + else: + return geopandas.GeoSeries.from_arrow(obj) except ValueError: pass + except TypeError: + pass except AttributeError: pass + if is_table_like: + obj = pa.table(obj) + is_geo_column = [ + isinstance(col.type, _type.GeometryExtensionType) for col in obj.columns + ] + new_cols = [ + to_geopandas(col) if is_geo else col + for is_geo, col in zip(is_geo_column, obj.columns) + ] + + # Set the geometry column if there is exactly one geometry column + geo_column_names = [ + name for name, is_geo in zip(obj.column_names, is_geo_column) if is_geo + ] + geometry = geo_column_names[0] if len(geo_column_names) == 1 else None + return geopandas.GeoDataFrame( + {name: col for name, col in zip(obj.column_names, new_cols)}, + geometry=geometry, + ) + # Fall back on wkb conversion wkb_array_or_chunked = as_wkb(obj) diff --git a/geoarrow-pyarrow/tests/test_geopandas.py b/geoarrow-pyarrow/tests/test_geopandas.py index f167b35..66e9609 100644 --- a/geoarrow-pyarrow/tests/test_geopandas.py +++ b/geoarrow-pyarrow/tests/test_geopandas.py @@ -1,4 +1,5 @@ import pytest +import pyarrow as pa from geoarrow import types import geoarrow.pyarrow as ga @@ -23,8 +24,18 @@ def test_scalar_to_shapely(): assert array[0].to_shapely().wkt == "POINT (30 10)" -def test_to_geopandas(): - array = ga.array(["POINT (30 10)"]) +def test_to_geopandas_unsupported_type(): + # GeoPandas doesn't support geoarrow.wkt, so this goes through the branch + # that handles any GeoPandas failure + array = ga.as_wkt(["POINT (30 10)"]) + geoseries = ga.to_geopandas(array) + assert isinstance(geoseries, geopandas.GeoSeries) + assert len(geoseries) == 1 + assert geoseries.to_wkt()[0] == "POINT (30 10)" + + +def test_to_geopandas_using_geopandas(): + array = ga.as_wkb(["POINT (30 10)"]) geoseries = ga.to_geopandas(array) assert isinstance(geoseries, geopandas.GeoSeries) assert len(geoseries) == 1 @@ -32,9 +43,40 @@ def test_to_geopandas(): def test_to_geopandas_with_crs(): - array = ga.with_crs(ga.array(["POINT (30 10)"]), types.OGC_CRS84) + array = ga.with_crs(ga.as_wkt(["POINT (30 10)"]), types.OGC_CRS84) + geoseries = ga.to_geopandas(array) + assert isinstance(geoseries, geopandas.GeoSeries) + assert len(geoseries) == 1 + assert geoseries.to_wkt()[0] == "POINT (30 10)" + assert geoseries.crs.to_authority() == ("OGC", "CRS84") + + +def test_to_geopandas_with_crs_using_geopandas(): + array = ga.with_crs(ga.as_wkb(["POINT (30 10)"]), types.OGC_CRS84) geoseries = ga.to_geopandas(array) assert isinstance(geoseries, geopandas.GeoSeries) assert len(geoseries) == 1 assert geoseries.to_wkt()[0] == "POINT (30 10)" assert geoseries.crs.to_authority() == ("OGC", "CRS84") + + +def test_table_to_geopandas_unsupported_type(): + # GeoPandas doesn't support geoarrow.wkt, so this goes through the branch + # that handles any GeoPandas failure + table = pa.table({"geom": ga.as_wkt(["POINT (30 10)"])}) + gdf = ga.to_geopandas(table) + assert isinstance(gdf, geopandas.GeoDataFrame) + + geoseries = gdf.geometry + assert len(geoseries) == 1 + assert geoseries.to_wkt()[0] == "POINT (30 10)" + + +def test_table_to_geopandas_using_geopandas(): + table = pa.table({"geom": ga.as_wkb(["POINT (30 10)"])}) + gdf = ga.to_geopandas(table) + assert isinstance(gdf, geopandas.GeoDataFrame) + + geoseries = gdf.geometry + assert len(geoseries) == 1 + assert geoseries.to_wkt()[0] == "POINT (30 10)"