Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions geoarrow-pyarrow/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,10 @@ description = ""
authors = [{name = "Dewey Dunnington", email = "[email protected]"}]
license = {text = "Apache-2.0"}
requires-python = ">=3.8"
dependencies = ["pyarrow >= 14.0.2", "geoarrow-types"]
dependencies = ["pyarrow >= 14.0.2", "geoarrow-types", "geoarrow-c"]

[project.optional-dependencies]
test = ["pytest", "pandas", "numpy", "geopandas", "pyogrio", "pyproj", "geoarrow-c"]
compute = ["geoarrow-c"]
test = ["pytest", "pandas", "numpy", "geopandas", "pyogrio", "pyproj"]

[project.urls]
homepage = "https://geoarrow.org"
Expand Down
4 changes: 4 additions & 0 deletions geoarrow-pyarrow/src/geoarrow/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@
MultiPolygonType,
wkb,
large_wkb,
wkb_view,
wkt,
large_wkt,
wkt_view,
point,
linestring,
polygon,
Expand Down Expand Up @@ -92,8 +94,10 @@
"MultiPolygonType",
"wkb",
"large_wkb",
"wkb_view",
"wkt",
"large_wkt",
"wkt_view",
"point",
"linestring",
"polygon",
Expand Down
24 changes: 24 additions & 0 deletions geoarrow-pyarrow/src/geoarrow/pyarrow/_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@ def large_wkb() -> WkbType:
return WkbType.__arrow_ext_deserialize__(pa.large_binary(), b"")


def wkb_view() -> WkbType:
"""Well-known binary using binary views as the underlying storage.

>>> import geoarrow.pyarrow as ga
>>> ga.wkb_view()
WkbType(geoarrow.wkb)
>>> ga.wkb_view().storage_type
DataType(binary_view)
"""
return WkbType.__arrow_ext_deserialize__(pa.binary_view(), b"")


def wkt() -> WktType:
"""Well-known text with a maximum array size of 2 GB per chunk.

Expand All @@ -64,6 +76,18 @@ def large_wkt() -> WktType:
return WktType.__arrow_ext_deserialize__(pa.large_utf8(), b"")


def wkt_view() -> WktType:
"""Well-known text using string views as the underlying storage.

>>> import geoarrow.pyarrow as ga
>>> ga.wkt_view()
WktType(geoarrow.wkt)
>>> ga.wkt_view().storage_type
DataType(string_view)
"""
return WktType.__arrow_ext_deserialize__(pa.string_view(), b"")


def point() -> PointType:
"""Geoarrow-encoded point features.

Expand Down
20 changes: 20 additions & 0 deletions geoarrow-pyarrow/tests/test_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,10 @@ def test_type_with_crs_pyproj():
def test_constructors():
assert ga.wkb().extension_name == "geoarrow.wkb"
assert ga.large_wkb().extension_name == "geoarrow.wkb"
assert ga.wkb_view().extension_name == "geoarrow.wkb"
assert ga.wkt().extension_name == "geoarrow.wkt"
assert ga.large_wkt().extension_name == "geoarrow.wkt"
assert ga.wkt_view().extension_name == "geoarrow.wkt"
assert ga.point().extension_name == "geoarrow.point"
assert ga.linestring().extension_name == "geoarrow.linestring"
assert ga.polygon().extension_name == "geoarrow.polygon"
Expand Down Expand Up @@ -131,6 +133,24 @@ def test_array():
assert array.type.storage_type == pa.large_binary()


def test_array_view_types():
# This one requires pyarrow >= 18, because that's when the necessary
# cast() was added.
try:
pa.array(["foofy"]).cast(pa.string_view())
except pa.lib.ArrowNotImplementedError:
pytest.skip("ga.array() with view types requires pyarrow >= 18.0.0")

array = ga.array(["POINT (30 10)"], ga.wkt_view())
assert array.type == ga.wkt_view()
assert array.type.storage_type == pa.string_view()

wkb_item = b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3e\x40\x00\x00\x00\x00\x00\x00\x24\x40"
array = ga.array([wkb_item], ga.wkb_view())
assert array.type == ga.wkb_view()
assert array.type.storage_type == pa.binary_view()


def test_array_repr():
array = ga.array(["POINT (30 10)"])
array_repr = repr(array)
Expand Down
4 changes: 4 additions & 0 deletions geoarrow-types/src/geoarrow/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
large_wkb,
wkt,
large_wkt,
wkb_view,
wkt_view,
box,
point,
linestring,
Expand All @@ -42,6 +44,8 @@
"large_wkb",
"wkt",
"large_wkt",
"wkb_view",
"wkt_view",
"geoarrow",
"box",
"point",
Expand Down
16 changes: 14 additions & 2 deletions geoarrow-types/src/geoarrow/types/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class Encoding(TypeSpecEnum):

>>> from geoarrow import types
>>> types.Encoding.GEOARROW
<Encoding.GEOARROW: 5>
<Encoding.GEOARROW: 7>
"""

UNSPECIFIED = 0
Expand All @@ -91,7 +91,13 @@ class Encoding(TypeSpecEnum):
LARGE_WKT = 4
"""Well-known text encoding with 64-bit offsets"""

GEOARROW = 5
WKB_VIEW = 5
"""Well-known binary encoding using binary views as a storage type"""

WKT_VIEW = 6
"""Well-known binary encoding using string views as a storage type"""

GEOARROW = 7
"""GeoArrow native nested list encoding"""

def is_serialized(self):
Expand All @@ -100,6 +106,8 @@ def is_serialized(self):
Encoding.LARGE_WKB,
Encoding.WKT,
Encoding.LARGE_WKT,
Encoding.WKB_VIEW,
Encoding.WKT_VIEW,
)


Expand Down Expand Up @@ -260,10 +268,14 @@ class EdgeType(TypeSpecEnum):
(Encoding.WKB, Encoding.LARGE_WKB): Encoding.LARGE_WKB,
(Encoding.WKB, Encoding.WKT): Encoding.WKB,
(Encoding.WKB, Encoding.LARGE_WKT): Encoding.LARGE_WKB,
(Encoding.WKB, Encoding.WKB_VIEW): Encoding.WKB_VIEW,
(Encoding.WKB, Encoding.GEOARROW): Encoding.WKB,
(Encoding.WKB_VIEW, Encoding.LARGE_WKB): Encoding.WKB_VIEW,
(Encoding.WKT, Encoding.LARGE_WKT): Encoding.LARGE_WKT,
(Encoding.WKT, Encoding.LARGE_WKB): Encoding.LARGE_WKB,
(Encoding.WKT, Encoding.WKT_VIEW): Encoding.WKT_VIEW,
(Encoding.WKT, Encoding.GEOARROW): Encoding.WKB,
(Encoding.WKT_VIEW, Encoding.LARGE_WKT): Encoding.WKT_VIEW,
(GeometryType.POINT, GeometryType.MULTIPOINT): GeometryType.MULTIPOINT,
(
GeometryType.LINESTRING,
Expand Down
10 changes: 10 additions & 0 deletions geoarrow-types/src/geoarrow/types/type_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,10 @@ def _parse_storage(storage_type):
return [("string", ())]
elif pa_types.is_large_string(storage_type):
return [("large_string", ())]
elif hasattr(pa_types, "is_binary_view") and pa_types.is_binary_view(storage_type):
return [("binary_view", ())]
elif hasattr(pa_types, "is_string_view") and pa_types.is_string_view(storage_type):
return [("string_view", ())]
elif pa_types.is_float64(storage_type):
return [("double", ())]
elif isinstance(storage_type, pa.ListType):
Expand Down Expand Up @@ -1014,6 +1018,10 @@ def _spec_short_repr(spec, ext_name):
Encoding.LARGE_WKB: pa.large_binary(),
}

if hasattr(pa, "binary_view"):
_SERIALIZED_STORAGE_TYPES[Encoding.WKT_VIEW] = pa.string_view()
_SERIALIZED_STORAGE_TYPES[Encoding.WKB_VIEW] = pa.binary_view()

_NATIVE_STORAGE_TYPES = _generate_storage_types()
_add_union_types_to_native_storage_types()

Expand All @@ -1022,6 +1030,8 @@ def _spec_short_repr(spec, ext_name):
("large_binary",): Encoding.LARGE_WKB,
("string",): Encoding.WKT,
("large_string",): Encoding.LARGE_WKT,
("binary_view",): Encoding.WKB_VIEW,
("string_view",): Encoding.WKT_VIEW,
("struct",): TypeSpec(
encoding=Encoding.GEOARROW,
geometry_type=GeometryType.POINT,
Expand Down
24 changes: 23 additions & 1 deletion geoarrow-types/src/geoarrow/types/type_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,12 +357,22 @@ def wkb(*, edge_type=None, crs=crs.UNSPECIFIED) -> TypeSpec:
def large_wkb(*, edge_type=None, crs=crs.UNSPECIFIED) -> TypeSpec:
"""Large well-known binary encoding

Create a :class:`TypeSpec` denoting a well-known binary type with
Create a :class:`TypeSpec` denoting a well-known binary type with
64-bit data offsets. See :func:`type_spec` for parameter definitions.
"""
return type_spec(encoding=Encoding.LARGE_WKB, edge_type=edge_type, crs=crs)


def wkb_view(*, edge_type=None, crs=crs.UNSPECIFIED) -> TypeSpec:
"""Well-known binary view encoding

Create a :class:`TypeSpec` denoting a well-known binary type using
binary views as the underlying storage type. See :func:`type_spec`
for parameter definitions.
"""
return type_spec(encoding=Encoding.WKB_VIEW, edge_type=edge_type, crs=crs)


def wkt(*, edge_type=None, crs=crs.UNSPECIFIED) -> TypeSpec:
"""Well-known text encoding

Expand All @@ -381,6 +391,16 @@ def large_wkt(*, edge_type=None, crs=crs.UNSPECIFIED) -> TypeSpec:
return type_spec(encoding=Encoding.LARGE_WKT, edge_type=edge_type, crs=crs)


def wkt_view(*, edge_type=None, crs=crs.UNSPECIFIED) -> TypeSpec:
"""Well-known text encoding

Create a :class:`TypeSpec` denoting a well-known text type using
string views as the underlying storage type. See :func:`type_spec`
for parameter definitions.
"""
return type_spec(encoding=Encoding.WKT_VIEW, edge_type=edge_type, crs=crs)


def geoarrow(
*,
geometry_type=None,
Expand Down Expand Up @@ -619,6 +639,8 @@ def type_spec(
Encoding.LARGE_WKB: "geoarrow.wkb",
Encoding.WKT: "geoarrow.wkt",
Encoding.LARGE_WKT: "geoarrow.wkt",
Encoding.WKB_VIEW: "geoarrow.wkb",
Encoding.WKT_VIEW: "geoarrow.wkt",
}

_GEOARROW_EXT_NAMES = {
Expand Down
8 changes: 8 additions & 0 deletions geoarrow-types/tests/test_type_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,8 @@ def test_multipolygon_array_from_geobuffers():
gt.large_wkt(),
gt.wkb(),
gt.large_wkb(),
gt.wkt_view(),
gt.wkb_view(),
# Geometry types
gt.box(),
gt.point(),
Expand Down Expand Up @@ -470,6 +472,12 @@ def test_multipolygon_array_from_geobuffers():
],
)
def test_roundtrip_extension_type(spec):
if not hasattr(pa, "binary_view") and spec.encoding in (
gt.Encoding.WKB_VIEW,
gt.Encoding.WKT_VIEW,
):
pytest.skip("binary_view/string_view requires pyarrow >= 14")

extension_type = type_pyarrow.extension_type(spec)
serialized = extension_type.__arrow_ext_serialize__()
extension_type2 = type_pyarrow._deserialize_storage(
Expand Down
Loading