Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 25 additions & 17 deletions geoarrow-pandas/src/geoarrow/pandas/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pyarrow as _pa
import pyarrow_hotfix as _ # noqa: F401
import numpy as _np
from geoarrow.c import lib
from geoarrow.types import TypeSpec, type_spec, Encoding
import geoarrow.pyarrow as _ga


Expand Down Expand Up @@ -83,8 +83,7 @@ class GeoArrowExtensionArray(_pd.api.extensions.ExtensionArray):
def __init__(self, obj, type=None):
if type is not None:
self._dtype = GeoArrowExtensionDtype(type)
arrow_type = _ga.GeometryExtensionType._from_ctype(self._dtype._parent)
self._data = _ga.array(obj, arrow_type)
self._data = _ga.array(obj, self._dtype._parent)
else:
self._data = _ga.array(obj)
self._dtype = GeoArrowExtensionDtype(self._data.type)
Expand Down Expand Up @@ -247,8 +246,8 @@ def to_numpy(self, dtype=None, copy=False, na_value=None):

return _np.array(list(self), dtype=object)

def __array__(self, dtype=None):
return self.to_numpy(dtype=dtype)
def __array__(self, dtype=None, copy=True):
return self.to_numpy(dtype=dtype, copy=copy)


@_pd.api.extensions.register_extension_dtype
Expand All @@ -271,20 +270,20 @@ class GeoArrowExtensionDtype(_pd.api.extensions.ExtensionDtype):

def __init__(self, parent):
if isinstance(parent, _ga.GeometryExtensionType):
self._parent = parent._type
elif isinstance(parent, lib.CVectorType):
self._parent = parent
elif isinstance(parent, TypeSpec):
self._parent = _ga.extension_type(parent)
elif isinstance(parent, GeoArrowExtensionDtype):
self._parent = parent._parent
else:
raise TypeError(
"`geoarrow_type` must inherit from geoarrow.pyarrow.VectorType, "
"geoarrow.CVectorType, or geoarrow.pandas.GeoArrowExtensionDtype"
"`geoarrow_type` must be a pyarrow extension type, "
"geoarrow.types.TypeSpec, or geoarrow.pandas.GeoArrowExtensionDtype"
)

@property
def pyarrow_dtype(self):
return _ga.GeometryExtensionType._from_ctype(self._parent)
return self._parent

@property
def type(self):
Expand Down Expand Up @@ -323,9 +322,9 @@ def construct_from_string(cls, string):
if params["coord_type"] == "[interleaved]":
coord_type = _ga.CoordType.INTERLEAVED
elif params["type"] in ("wkt", "wkb"):
coord_type = _ga.CoordType.UNKNOWN
coord_type = _ga.CoordType.UNSPECIFIED
else:
coord_type = _ga.CoordType.SEPARATE
coord_type = _ga.CoordType.SEPARATED

if params["type"] == "point":
geometry_type = _ga.GeometryType.POINT
Expand All @@ -347,7 +346,9 @@ def construct_from_string(cls, string):
elif params["type"] == "wkt":
base_type = _ga.wkt()
else:
base_type = _ga.extension_type(geometry_type, dims, coord_type)
base_type = _ga.extension_type(
type_spec(Encoding.GEOARROW, geometry_type, dims, coord_type)
)

try:
if params["metadata"]:
Expand All @@ -368,7 +369,7 @@ def __str__(self):
ext_name = self._parent.extension_name
ext_dims = self._parent.dimensions
ext_coord = self._parent.coord_type
ext_meta = self._parent.extension_metadata.decode("UTF-8")
ext_meta = self._parent.__arrow_ext_serialize__().decode("UTF-8")

if ext_dims == _ga.Dimensions.XYZ:
dims_str = "[z]"
Expand Down Expand Up @@ -440,7 +441,14 @@ def _wrap_series(self, array_or_chunked):
)

def _obj_is_geoarrow(self):
return isinstance(self._obj.dtype, GeoArrowExtensionDtype)
if isinstance(self._obj.dtype, GeoArrowExtensionDtype):
return True

if not isinstance(self._obj.dtype, _pd.ArrowDtype):
return False

arrow_type = self._obj.dtype.pyarrow_dtype
return isinstance(arrow_type, _ga.GeometryExtensionType)

def parse_all(self):
"""See :func:`geoarrow.pyarrow.parse_all`"""
Expand Down Expand Up @@ -529,9 +537,9 @@ def with_edge_type(self, edge_type):
"""See :func:`geoarrow.pyarrow.with_edge_type`"""
return self._wrap_series(_ga.with_edge_type(self._obj, edge_type))

def with_crs(self, crs, crs_type=None):
def with_crs(self, crs):
"""See :func:`geoarrow.pyarrow.with_crs`"""
return self._wrap_series(_ga.with_crs(self._obj, crs=crs, crs_type=crs_type))
return self._wrap_series(_ga.with_crs(self._obj, crs=crs))

def with_dimensions(self, dimensions):
"""See :func:`geoarrow.pyarrow.with_dimensions`"""
Expand Down
30 changes: 8 additions & 22 deletions geoarrow-pandas/tests/test_geoarrow_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import pyarrow as pa
import geoarrow.pandas as gapd
import geoarrow.pyarrow as ga
from geoarrow.c import lib
import numpy as np


Expand All @@ -18,10 +17,10 @@ def test_dtype_constructor():
from_pyarrow = gapd.GeoArrowExtensionDtype(ga.point())
assert from_pyarrow.name == "geoarrow.point"

from_ctype = gapd.GeoArrowExtensionDtype(ga.point()._type)
assert from_ctype.name == "geoarrow.point"
from_spec = gapd.GeoArrowExtensionDtype(ga.point().spec)
assert from_spec.name == "geoarrow.point"

from_dtype = gapd.GeoArrowExtensionDtype(from_ctype)
from_dtype = gapd.GeoArrowExtensionDtype(from_spec)
assert from_dtype.name == "geoarrow.point"

with pytest.raises(TypeError):
Expand All @@ -34,8 +33,8 @@ def test_dtype_strings():
dtype2 = gapd.GeoArrowExtensionDtype.construct_from_string(str(dtype))
assert dtype2 == dtype

dtype = gapd.GeoArrowExtensionDtype(ga.point().with_crs("EPSG:1234"))
assert str(dtype) == 'geoarrow.point{"crs":"EPSG:1234"}'
dtype = gapd.GeoArrowExtensionDtype(ga.point().with_crs(ga.OGC_CRS84))
assert str(dtype) == 'geoarrow.point{"crs": ' + ga.OGC_CRS84.to_json() + "}"
dtype2 = gapd.GeoArrowExtensionDtype.construct_from_string(str(dtype))
assert dtype2 == dtype

Expand Down Expand Up @@ -182,23 +181,10 @@ def test_array_concat():
assert len(concatenated_diff_type) == 6


def test_pyarrow_integration():
pa_array = ga.array(["POINT (0 1)", "POINT (1 2)", None])
series = pa_array.to_pandas()
assert series.dtype == gapd.GeoArrowExtensionDtype(ga.wkt())
assert series[0] == gapd.GeoArrowExtensionScalar("POINT (0 1)")
assert pa.array(series) is pa_array

pa_chunked_array = pa.chunked_array([pa_array])
series = pa_chunked_array.to_pandas()
assert series.dtype == gapd.GeoArrowExtensionDtype(ga.wkt())
assert series[0] == gapd.GeoArrowExtensionScalar("POINT (0 1)")


def test_accessor_parse_all():
series = pd.Series(["POINT (0 1)"])
assert series.geoarrow.parse_all() is series
with pytest.raises(lib.GeoArrowCException):
with pytest.raises(Exception, match="Expected geometry type at byte 0"):
pd.Series(["NOT WKT"]).geoarrow.parse_all()


Expand Down Expand Up @@ -278,8 +264,8 @@ def test_accessor_with_edge_type():


def test_accessor_with_crs():
ga_series = pd.Series(["POINT (0 1)"]).geoarrow.with_crs("EPSG:1234")
assert ga_series.dtype.pyarrow_dtype.crs == "EPSG:1234"
ga_series = pd.Series(["POINT (0 1)"]).geoarrow.with_crs(ga.OGC_CRS84)
assert ga_series.dtype.pyarrow_dtype.crs == ga.OGC_CRS84


def test_accessor_with_dimensions():
Expand Down
15 changes: 13 additions & 2 deletions geoarrow-pyarrow/src/geoarrow/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,14 @@

from geoarrow.types._version import __version__, __version_tuple__ # NOQA: F401

from geoarrow.c.lib import GeometryType, Dimensions, CoordType, EdgeType, CrsType
from geoarrow.types import (
GeometryType,
Dimensions,
CoordType,
EdgeType,
Encoding,
OGC_CRS84,
)

from geoarrow.pyarrow._type import (
GeometryExtensionType,
Expand All @@ -33,6 +40,9 @@
multipolygon,
extension_type,
geometry_type_common,
)

from geoarrow.types.type_pyarrow import (
register_extension_types,
unregister_extension_types,
)
Expand Down Expand Up @@ -69,10 +79,11 @@
"Dimensions",
"CoordType",
"EdgeType",
"CrsType",
"Encoding",
"GeometryExtensionType",
"WktType",
"WkbType",
"OGC_CRS84",
"PointType",
"LinestringType",
"PolygonType",
Expand Down
6 changes: 3 additions & 3 deletions geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ def __repr__(self):
tail_str = [f"<{item.as_py()}>" for item in tail]
for i in range(len(head)):
if len(head_str[i]) > max_width:
head_str[i] = f"{head_str[i][:(max_width - 4)]}...>"
head_str[i] = f"{head_str[i][: (max_width - 4)]}...>"
for i in range(len(tail)):
if len(tail_str[i]) > max_width:
tail_str[i] = f"{tail_str[i][:(max_width - 4)]}...>"
tail_str[i] = f"{tail_str[i][: (max_width - 4)]}...>"

type_name = type(self).__name__
head_str = "\n".join(head_str)
Expand Down Expand Up @@ -138,7 +138,7 @@ def array(obj, type_=None, *args, **kwargs) -> GeometryExtensionArray:
# Convert GeoPandas to WKB
if type(obj).__name__ == "GeoSeries":
if obj.crs:
type_ = wkb().with_crs(obj.crs.to_json(), lib.CrsType.PROJJSON)
type_ = wkb().with_crs(obj.crs)
else:
type_ = wkb()

Expand Down
Loading
Loading