Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 44 additions & 2 deletions geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,25 +603,67 @@ def point_coords(obj, dimensions=None):


def to_geopandas(obj):
"""Convert a geoarrow-like array into a ``geopandas.GeoSeries``.
"""Convert a geoarrow-like array or table into a GeoSeries/DataFrame

These are thin wrappers around ``GeoSeries.from_arrow()`` and
``GeoDataFrame.from_arrow()`` where available, falling back on conversion
through WKB if using an older version of GeoPandas or an Arrow array type
that GeoPandas doesn't support.

>>> import pyarrow as pa
>>> import geoarrow.pyarrow as ga
>>> array = ga.as_geoarrow(["POINT (0 1)"])
>>> ga.to_geopandas(array)
0 POINT (0 1)
dtype: geometry
>>> table = pa.table({"geometry": array})
>>> ga.to_geopandas(table)
geometry
0 POINT (0 1)
"""
import geopandas
import pandas as pd

# Heuristic to detect table-like objects
is_table_like = (
hasattr(obj, "schema")
and not callable(obj.schema)
and isinstance(obj.schema, pa.Schema)
)

# Attempt GeoPandas from_arrow first
try:
return geopandas.GeoSeries.from_arrow(obj)
if is_table_like:
return geopandas.GeoDataFrame.from_arrow(obj)
else:
return geopandas.GeoSeries.from_arrow(obj)
except ValueError:
pass
except TypeError:
pass
except AttributeError:
pass

if is_table_like:
obj = pa.table(obj)
is_geo_column = [
isinstance(col.type, _type.GeometryExtensionType) for col in obj.columns
]
new_cols = [
to_geopandas(col) if is_geo else col
for is_geo, col in zip(is_geo_column, obj.columns)
]

# Set the geometry column if there is exactly one geometry column
geo_column_names = [
name for name, is_geo in zip(obj.column_names, is_geo_column) if is_geo
]
geometry = geo_column_names[0] if len(geo_column_names) == 1 else None
return geopandas.GeoDataFrame(
{name: col for name, col in zip(obj.column_names, new_cols)},
geometry=geometry,
)

# Fall back on wkb conversion
wkb_array_or_chunked = as_wkb(obj)

Expand Down
48 changes: 45 additions & 3 deletions geoarrow-pyarrow/tests/test_geopandas.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
import pyarrow as pa
from geoarrow import types
import geoarrow.pyarrow as ga

Expand All @@ -23,18 +24,59 @@ def test_scalar_to_shapely():
assert array[0].to_shapely().wkt == "POINT (30 10)"


def test_to_geopandas():
array = ga.array(["POINT (30 10)"])
def test_to_geopandas_unsupported_type():
# GeoPandas doesn't support geoarrow.wkt, so this goes through the branch
# that handles any GeoPandas failure
array = ga.as_wkt(["POINT (30 10)"])
geoseries = ga.to_geopandas(array)
assert isinstance(geoseries, geopandas.GeoSeries)
assert len(geoseries) == 1
assert geoseries.to_wkt()[0] == "POINT (30 10)"


def test_to_geopandas_using_geopandas():
array = ga.as_wkb(["POINT (30 10)"])
geoseries = ga.to_geopandas(array)
assert isinstance(geoseries, geopandas.GeoSeries)
assert len(geoseries) == 1
assert geoseries.to_wkt()[0] == "POINT (30 10)"


def test_to_geopandas_with_crs():
array = ga.with_crs(ga.array(["POINT (30 10)"]), types.OGC_CRS84)
array = ga.with_crs(ga.as_wkt(["POINT (30 10)"]), types.OGC_CRS84)
geoseries = ga.to_geopandas(array)
assert isinstance(geoseries, geopandas.GeoSeries)
assert len(geoseries) == 1
assert geoseries.to_wkt()[0] == "POINT (30 10)"
assert geoseries.crs.to_authority() == ("OGC", "CRS84")


def test_to_geopandas_with_crs_using_geopandas():
array = ga.with_crs(ga.as_wkb(["POINT (30 10)"]), types.OGC_CRS84)
geoseries = ga.to_geopandas(array)
assert isinstance(geoseries, geopandas.GeoSeries)
assert len(geoseries) == 1
assert geoseries.to_wkt()[0] == "POINT (30 10)"
assert geoseries.crs.to_authority() == ("OGC", "CRS84")


def test_table_to_geopandas_unsupported_type():
# GeoPandas doesn't support geoarrow.wkt, so this goes through the branch
# that handles any GeoPandas failure
table = pa.table({"geom": ga.as_wkt(["POINT (30 10)"])})
gdf = ga.to_geopandas(table)
assert isinstance(gdf, geopandas.GeoDataFrame)

geoseries = gdf.geometry
assert len(geoseries) == 1
assert geoseries.to_wkt()[0] == "POINT (30 10)"


def test_table_to_geopandas_using_geopandas():
table = pa.table({"geom": ga.as_wkb(["POINT (30 10)"])})
gdf = ga.to_geopandas(table)
assert isinstance(gdf, geopandas.GeoDataFrame)

geoseries = gdf.geometry
assert len(geoseries) == 1
assert geoseries.to_wkt()[0] == "POINT (30 10)"
Loading