Skip to content

Commit edca570

Browse files
authored
refactor(geoarrow-types,geoarrow-pyarrow): Use the geoarrow-types extension definitions in geoarrow-pyarrow (#54)
1 parent 1677110 commit edca570

File tree

16 files changed

+705
-909
lines changed

16 files changed

+705
-909
lines changed

geoarrow-pandas/src/geoarrow/pandas/lib.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pyarrow as _pa
44
import pyarrow_hotfix as _ # noqa: F401
55
import numpy as _np
6-
from geoarrow.c import lib
6+
from geoarrow.types import TypeSpec, type_spec, Encoding
77
import geoarrow.pyarrow as _ga
88

99

@@ -83,8 +83,7 @@ class GeoArrowExtensionArray(_pd.api.extensions.ExtensionArray):
8383
def __init__(self, obj, type=None):
8484
if type is not None:
8585
self._dtype = GeoArrowExtensionDtype(type)
86-
arrow_type = _ga.GeometryExtensionType._from_ctype(self._dtype._parent)
87-
self._data = _ga.array(obj, arrow_type)
86+
self._data = _ga.array(obj, self._dtype._parent)
8887
else:
8988
self._data = _ga.array(obj)
9089
self._dtype = GeoArrowExtensionDtype(self._data.type)
@@ -247,8 +246,8 @@ def to_numpy(self, dtype=None, copy=False, na_value=None):
247246

248247
return _np.array(list(self), dtype=object)
249248

250-
def __array__(self, dtype=None):
251-
return self.to_numpy(dtype=dtype)
249+
def __array__(self, dtype=None, copy=True):
250+
return self.to_numpy(dtype=dtype, copy=copy)
252251

253252

254253
@_pd.api.extensions.register_extension_dtype
@@ -271,20 +270,20 @@ class GeoArrowExtensionDtype(_pd.api.extensions.ExtensionDtype):
271270

272271
def __init__(self, parent):
273272
if isinstance(parent, _ga.GeometryExtensionType):
274-
self._parent = parent._type
275-
elif isinstance(parent, lib.CVectorType):
276273
self._parent = parent
274+
elif isinstance(parent, TypeSpec):
275+
self._parent = _ga.extension_type(parent)
277276
elif isinstance(parent, GeoArrowExtensionDtype):
278277
self._parent = parent._parent
279278
else:
280279
raise TypeError(
281-
"`geoarrow_type` must inherit from geoarrow.pyarrow.VectorType, "
282-
"geoarrow.CVectorType, or geoarrow.pandas.GeoArrowExtensionDtype"
280+
"`geoarrow_type` must be a pyarrow extension type, "
281+
"geoarrow.types.TypeSpec, or geoarrow.pandas.GeoArrowExtensionDtype"
283282
)
284283

285284
@property
286285
def pyarrow_dtype(self):
287-
return _ga.GeometryExtensionType._from_ctype(self._parent)
286+
return self._parent
288287

289288
@property
290289
def type(self):
@@ -323,9 +322,9 @@ def construct_from_string(cls, string):
323322
if params["coord_type"] == "[interleaved]":
324323
coord_type = _ga.CoordType.INTERLEAVED
325324
elif params["type"] in ("wkt", "wkb"):
326-
coord_type = _ga.CoordType.UNKNOWN
325+
coord_type = _ga.CoordType.UNSPECIFIED
327326
else:
328-
coord_type = _ga.CoordType.SEPARATE
327+
coord_type = _ga.CoordType.SEPARATED
329328

330329
if params["type"] == "point":
331330
geometry_type = _ga.GeometryType.POINT
@@ -347,7 +346,9 @@ def construct_from_string(cls, string):
347346
elif params["type"] == "wkt":
348347
base_type = _ga.wkt()
349348
else:
350-
base_type = _ga.extension_type(geometry_type, dims, coord_type)
349+
base_type = _ga.extension_type(
350+
type_spec(Encoding.GEOARROW, geometry_type, dims, coord_type)
351+
)
351352

352353
try:
353354
if params["metadata"]:
@@ -368,7 +369,7 @@ def __str__(self):
368369
ext_name = self._parent.extension_name
369370
ext_dims = self._parent.dimensions
370371
ext_coord = self._parent.coord_type
371-
ext_meta = self._parent.extension_metadata.decode("UTF-8")
372+
ext_meta = self._parent.__arrow_ext_serialize__().decode("UTF-8")
372373

373374
if ext_dims == _ga.Dimensions.XYZ:
374375
dims_str = "[z]"
@@ -440,7 +441,14 @@ def _wrap_series(self, array_or_chunked):
440441
)
441442

442443
def _obj_is_geoarrow(self):
443-
return isinstance(self._obj.dtype, GeoArrowExtensionDtype)
444+
if isinstance(self._obj.dtype, GeoArrowExtensionDtype):
445+
return True
446+
447+
if not isinstance(self._obj.dtype, _pd.ArrowDtype):
448+
return False
449+
450+
arrow_type = self._obj.dtype.pyarrow_dtype
451+
return isinstance(arrow_type, _ga.GeometryExtensionType)
444452

445453
def parse_all(self):
446454
"""See :func:`geoarrow.pyarrow.parse_all`"""
@@ -529,9 +537,9 @@ def with_edge_type(self, edge_type):
529537
"""See :func:`geoarrow.pyarrow.with_edge_type`"""
530538
return self._wrap_series(_ga.with_edge_type(self._obj, edge_type))
531539

532-
def with_crs(self, crs, crs_type=None):
540+
def with_crs(self, crs):
533541
"""See :func:`geoarrow.pyarrow.with_crs`"""
534-
return self._wrap_series(_ga.with_crs(self._obj, crs=crs, crs_type=crs_type))
542+
return self._wrap_series(_ga.with_crs(self._obj, crs=crs))
535543

536544
def with_dimensions(self, dimensions):
537545
"""See :func:`geoarrow.pyarrow.with_dimensions`"""

geoarrow-pandas/tests/test_geoarrow_pandas.py

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import pyarrow as pa
77
import geoarrow.pandas as gapd
88
import geoarrow.pyarrow as ga
9-
from geoarrow.c import lib
109
import numpy as np
1110

1211

@@ -18,10 +17,10 @@ def test_dtype_constructor():
1817
from_pyarrow = gapd.GeoArrowExtensionDtype(ga.point())
1918
assert from_pyarrow.name == "geoarrow.point"
2019

21-
from_ctype = gapd.GeoArrowExtensionDtype(ga.point()._type)
22-
assert from_ctype.name == "geoarrow.point"
20+
from_spec = gapd.GeoArrowExtensionDtype(ga.point().spec)
21+
assert from_spec.name == "geoarrow.point"
2322

24-
from_dtype = gapd.GeoArrowExtensionDtype(from_ctype)
23+
from_dtype = gapd.GeoArrowExtensionDtype(from_spec)
2524
assert from_dtype.name == "geoarrow.point"
2625

2726
with pytest.raises(TypeError):
@@ -34,8 +33,8 @@ def test_dtype_strings():
3433
dtype2 = gapd.GeoArrowExtensionDtype.construct_from_string(str(dtype))
3534
assert dtype2 == dtype
3635

37-
dtype = gapd.GeoArrowExtensionDtype(ga.point().with_crs("EPSG:1234"))
38-
assert str(dtype) == 'geoarrow.point{"crs":"EPSG:1234"}'
36+
dtype = gapd.GeoArrowExtensionDtype(ga.point().with_crs(ga.OGC_CRS84))
37+
assert str(dtype) == 'geoarrow.point{"crs": ' + ga.OGC_CRS84.to_json() + "}"
3938
dtype2 = gapd.GeoArrowExtensionDtype.construct_from_string(str(dtype))
4039
assert dtype2 == dtype
4140

@@ -182,23 +181,10 @@ def test_array_concat():
182181
assert len(concatenated_diff_type) == 6
183182

184183

185-
def test_pyarrow_integration():
186-
pa_array = ga.array(["POINT (0 1)", "POINT (1 2)", None])
187-
series = pa_array.to_pandas()
188-
assert series.dtype == gapd.GeoArrowExtensionDtype(ga.wkt())
189-
assert series[0] == gapd.GeoArrowExtensionScalar("POINT (0 1)")
190-
assert pa.array(series) is pa_array
191-
192-
pa_chunked_array = pa.chunked_array([pa_array])
193-
series = pa_chunked_array.to_pandas()
194-
assert series.dtype == gapd.GeoArrowExtensionDtype(ga.wkt())
195-
assert series[0] == gapd.GeoArrowExtensionScalar("POINT (0 1)")
196-
197-
198184
def test_accessor_parse_all():
199185
series = pd.Series(["POINT (0 1)"])
200186
assert series.geoarrow.parse_all() is series
201-
with pytest.raises(lib.GeoArrowCException):
187+
with pytest.raises(Exception, match="Expected geometry type at byte 0"):
202188
pd.Series(["NOT WKT"]).geoarrow.parse_all()
203189

204190

@@ -278,8 +264,8 @@ def test_accessor_with_edge_type():
278264

279265

280266
def test_accessor_with_crs():
281-
ga_series = pd.Series(["POINT (0 1)"]).geoarrow.with_crs("EPSG:1234")
282-
assert ga_series.dtype.pyarrow_dtype.crs == "EPSG:1234"
267+
ga_series = pd.Series(["POINT (0 1)"]).geoarrow.with_crs(ga.OGC_CRS84)
268+
assert ga_series.dtype.pyarrow_dtype.crs == ga.OGC_CRS84
283269

284270

285271
def test_accessor_with_dimensions():

geoarrow-pyarrow/src/geoarrow/pyarrow/__init__.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,14 @@
99

1010
from geoarrow.types._version import __version__, __version_tuple__ # NOQA: F401
1111

12-
from geoarrow.c.lib import GeometryType, Dimensions, CoordType, EdgeType, CrsType
12+
from geoarrow.types import (
13+
GeometryType,
14+
Dimensions,
15+
CoordType,
16+
EdgeType,
17+
Encoding,
18+
OGC_CRS84,
19+
)
1320

1421
from geoarrow.pyarrow._type import (
1522
GeometryExtensionType,
@@ -33,6 +40,9 @@
3340
multipolygon,
3441
extension_type,
3542
geometry_type_common,
43+
)
44+
45+
from geoarrow.types.type_pyarrow import (
3646
register_extension_types,
3747
unregister_extension_types,
3848
)
@@ -69,10 +79,11 @@
6979
"Dimensions",
7080
"CoordType",
7181
"EdgeType",
72-
"CrsType",
82+
"Encoding",
7383
"GeometryExtensionType",
7484
"WktType",
7585
"WkbType",
86+
"OGC_CRS84",
7687
"PointType",
7788
"LinestringType",
7889
"PolygonType",

geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,10 @@ def __repr__(self):
5555
tail_str = [f"<{item.as_py()}>" for item in tail]
5656
for i in range(len(head)):
5757
if len(head_str[i]) > max_width:
58-
head_str[i] = f"{head_str[i][:(max_width - 4)]}...>"
58+
head_str[i] = f"{head_str[i][: (max_width - 4)]}...>"
5959
for i in range(len(tail)):
6060
if len(tail_str[i]) > max_width:
61-
tail_str[i] = f"{tail_str[i][:(max_width - 4)]}...>"
61+
tail_str[i] = f"{tail_str[i][: (max_width - 4)]}...>"
6262

6363
type_name = type(self).__name__
6464
head_str = "\n".join(head_str)
@@ -138,7 +138,7 @@ def array(obj, type_=None, *args, **kwargs) -> GeometryExtensionArray:
138138
# Convert GeoPandas to WKB
139139
if type(obj).__name__ == "GeoSeries":
140140
if obj.crs:
141-
type_ = wkb().with_crs(obj.crs.to_json(), lib.CrsType.PROJJSON)
141+
type_ = wkb().with_crs(obj.crs)
142142
else:
143143
type_ = wkb()
144144

0 commit comments

Comments
 (0)