diff --git a/geoarrow-pandas/src/geoarrow/pandas/lib.py b/geoarrow-pandas/src/geoarrow/pandas/lib.py index dc65e96..b754388 100644 --- a/geoarrow-pandas/src/geoarrow/pandas/lib.py +++ b/geoarrow-pandas/src/geoarrow/pandas/lib.py @@ -507,18 +507,18 @@ def bounds(self): """See :func:`geoarrow.pyarrow.box`""" array_or_chunked = _ga.box(self._obj) if isinstance(array_or_chunked, _pa.ChunkedArray): - flattened = [chunk.flatten() for chunk in array_or_chunked.chunks] + flattened = [chunk.storage.flatten() for chunk in array_or_chunked.chunks] seriesish = [ _pa.chunked_array(item, _pa.float64()) for item in zip(*flattened) ] else: - seriesish = array_or_chunked.flatten() + seriesish = array_or_chunked.storage.flatten() return _pd.DataFrame( { "xmin": seriesish[0], - "xmax": seriesish[1], - "ymin": seriesish[2], + "xmax": seriesish[2], + "ymin": seriesish[1], "ymax": seriesish[3], }, index=self._obj.index, diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py index 64cfd26..7782626 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py @@ -78,49 +78,18 @@ def __repr__(self): return f"{type_name}:{repr(self.type)}[{len(self)}]\n{items_str}".strip() -class PointArray(GeometryExtensionArray): - pass - - -class LinestringArray(GeometryExtensionArray): - pass - - -class PolygonArray(GeometryExtensionArray): - pass - - -class MultiPointArray(GeometryExtensionArray): - pass - - -class MultiLinestringArray(GeometryExtensionArray): - pass - - -class MultiPolygonArray(GeometryExtensionArray): - pass +class BoxArray(GeometryExtensionArray): + def __repr__(self): + type_name = type(self).__name__ + items_str = "\n".join(repr(item.bounds) for item in self) + return f"{type_name}:{repr(self.type)}[{len(self)}]\n{items_str}".strip() def array_cls_from_name(name): - if name == "geoarrow.wkb": - return GeometryExtensionArray - elif name == "geoarrow.wkt": - return GeometryExtensionArray - elif name == "geoarrow.point": - return PointArray - elif name == "geoarrow.linestring": - return LinestringArray - elif name == "geoarrow.polygon": - return PolygonArray - elif name == "geoarrow.multipoint": - return MultiPointArray - elif name == "geoarrow.multilinestring": - return MultiLinestringArray - elif name == "geoarrow.multipolygon": - return MultiPolygonArray + if name == "geoarrow.box": + return BoxArray else: - raise ValueError(f'Expected valid extension name but got "{name}"') + return GeometryExtensionArray # Inject array_cls_from_name exactly once to avoid circular import @@ -142,7 +111,7 @@ def array(obj, type_=None, *args, **kwargs) -> GeometryExtensionArray: GeometryExtensionArray:WktType(geoarrow.wkt)[1] >>> ga.as_geoarrow(["POINT (0 1)"]) - PointArray:PointType(geoarrow.point)[1] + GeometryExtensionArray:PointType(geoarrow.point)[1] """ # Convert GeoPandas to WKB diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py index de7db0f..2d9a074 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_compute.py @@ -251,7 +251,7 @@ def as_geoarrow(obj, type=None, coord_type=None, promote_multi=False): >>> import geoarrow.pyarrow as ga >>> ga.as_geoarrow(["POINT (0 1)", "MULTIPOINT Z (0 1 2, 4 5 6)"]) - MultiPointArray:MultiPointType(geoarrow.multipoint_z)[2] + GeometryExtensionArray:MultiPointType(geoarrow.multipoint_z)[2] """ @@ -307,7 +307,7 @@ def make_point(x, y, z=None, m=None, crs=None): >>> import geoarrow.pyarrow as ga >>> ga.make_point([1, 2, 3], [4, 5, 6]) - PointArray:PointType(geoarrow.point)[3] + GeometryExtensionArray:PointType(geoarrow.point)[3] @@ -338,10 +338,11 @@ def make_point(x, y, z=None, m=None, crs=None): def _box_point_struct(storage): arrays = storage.flatten() - return pa.StructArray.from_arrays( - [arrays[0], arrays[0], arrays[1], arrays[1]], - names=["xmin", "xmax", "ymin", "ymax"], + box_storage = pa.StructArray.from_arrays( + [arrays[0], arrays[1], arrays[0], arrays[1]], + names=["xmin", "ymin", "xmax", "ymax"], ) + return _type.types.box().to_pyarrow().wrap_array(box_storage) def box(obj): @@ -350,7 +351,7 @@ def box(obj): >>> import geoarrow.pyarrow as ga >>> ga.box(["LINESTRING (0 10, 34 -1)"]).type - StructType(struct) + BoxType(geoarrow.box) >>> print(str(ga.box(["LINESTRING (0 10, 34 -1)"]))) -- is_valid: all not null -- child 0 type: double @@ -359,11 +360,11 @@ def box(obj): ] -- child 1 type: double [ - 34 + -1 ] -- child 2 type: double [ - -1 + 34 ] -- child 3 type: double [ @@ -399,15 +400,15 @@ def _box_agg_point_struct(arrays): out = [list(pc.min_max(array).values()) for array in arrays] out_dict = { "xmin": out[0][0].as_py(), - "xmax": out[0][1].as_py(), "ymin": out[1][0].as_py(), + "xmax": out[0][1].as_py(), "ymax": out[1][1].as_py(), } # Apparently pyarrow reorders dict keys when inferring scalar types? - return pa.scalar( - out_dict, pa.struct([(nm, pa.float64()) for nm in out_dict.keys()]) - ) + storage_type = pa.struct([(nm, pa.float64()) for nm in out_dict.keys()]) + storage_array = pa.array([out_dict], storage_type) + return _type.types.box().to_pyarrow().wrap_array(storage_array)[0] def box_agg(obj): @@ -417,7 +418,7 @@ def box_agg(obj): >>> import geoarrow.pyarrow as ga >>> ga.box_agg(["POINT (0 10)", "POINT (34 -1)"]) - + BoxScalar({'xmin': 0.0, 'ymin': -1.0, 'xmax': 34.0, 'ymax': 10.0}) """ obj = obj_as_array_or_chunked(obj) @@ -495,7 +496,7 @@ def with_coord_type(obj, coord_type): >>> import geoarrow.pyarrow as ga >>> ga.with_coord_type(["POINT (0 1)"], ga.CoordType.INTERLEAVED) - PointArray:PointType(interleaved geoarrow.point)[1] + GeometryExtensionArray:PointType(interleaved geoarrow.point)[1] """ return as_geoarrow(obj, coord_type=coord_type) @@ -537,10 +538,10 @@ def with_dimensions(obj, dimensions): >>> import geoarrow.pyarrow as ga >>> ga.with_dimensions(["POINT (0 1)"], ga.Dimensions.XYZM) - PointArray:PointType(geoarrow.point_zm)[1] + GeometryExtensionArray:PointType(geoarrow.point_zm)[1] >>> ga.with_dimensions(["POINT ZM (0 1 2 3)"], ga.Dimensions.XY) - PointArray:PointType(geoarrow.point)[1] + GeometryExtensionArray:PointType(geoarrow.point)[1] """ obj = as_geoarrow(obj) @@ -557,13 +558,13 @@ def with_geometry_type(obj, geometry_type): >>> import geoarrow.pyarrow as ga >>> ga.with_geometry_type(["POINT (0 1)"], ga.GeometryType.MULTIPOINT) - MultiPointArray:MultiPointType(geoarrow.multipoint)[1] + GeometryExtensionArray:MultiPointType(geoarrow.multipoint)[1] >>> ga.with_geometry_type(["MULTIPOINT (0 1)"], ga.GeometryType.POINT) - PointArray:PointType(geoarrow.point)[1] + GeometryExtensionArray:PointType(geoarrow.point)[1] >>> ga.with_geometry_type(["LINESTRING EMPTY", "POINT (0 1)"], ga.GeometryType.POINT) - PointArray:PointType(geoarrow.point)[2] + GeometryExtensionArray:PointType(geoarrow.point)[2] >>> ga.with_geometry_type(["MULTIPOINT (0 1, 2 3)"], ga.GeometryType.POINT) diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_kernel.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_kernel.py index f61cfd6..0e091ea 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_kernel.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_kernel.py @@ -2,21 +2,29 @@ import pyarrow as pa import pyarrow_hotfix as _ # noqa: F401 +from geoarrow.types import box as box_spec from geoarrow.pyarrow._type import GeometryExtensionType _lazy_lib = None +_geoarrow_c_version = None def _geoarrow_c(): - global _lazy_lib + global _lazy_lib, _geoarrow_c_version if _lazy_lib is None: try: - from geoarrow.c import lib + import geoarrow.c + except ImportError as e: raise ImportError("Requested operation requires geoarrow-c") from e - _lazy_lib = lib + _lazy_lib = geoarrow.c.lib + if hasattr(geoarrow.c, "__version_tuple__"): + _geoarrow_c_version = geoarrow.c.__version_tuple__ + else: + _geoarrow_c_version = (0, 1, 0) + return _lazy_lib @@ -109,11 +117,19 @@ def unique_geometry_types_agg(type_in): @staticmethod def box(type_in): - return Kernel("box", type_in) + kernel = Kernel("box", type_in) + if _geoarrow_c_version <= (0, 1, 3): + return BoxKernelCompat(kernel) + else: + return kernel @staticmethod def box_agg(type_in): - return Kernel("box_agg", type_in) + kernel = Kernel("box_agg", type_in) + if _geoarrow_c_version <= (0, 1, 3): + return BoxKernelCompat(kernel) + else: + return kernel @staticmethod def _pack_options(options): @@ -132,3 +148,29 @@ def _pack_options(options): bytes += v.encode("UTF-8") return bytes + + +class BoxKernelCompat: + """A wrapper around the "box" kernel that works for geoarrow-c 0.1. + This is mostly to ease the transition for geoarrow-python CI while + all the packages are being updated.""" + + def __init__(self, parent: Kernel): + self.parent = parent + self.type_out = box_spec().to_pyarrow().with_crs(parent._type_in.crs) + + def push(self, arr): + parent_result = self.parent.push(arr) + return ( + None if parent_result is None else self._old_box_to_new_box(parent_result) + ) + + def finish(self): + return self._old_box_to_new_box(self.parent.finish()) + + def _old_box_to_new_box(self, array): + xmin, xmax, ymin, ymax = array.flatten() + storage = pa.StructArray.from_arrays( + [xmin, ymin, xmax, ymax], names=["xmin", "ymin", "xmax", "ymax"] + ) + return self.type_out.wrap_array(storage) diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py index ef30631..2333e32 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py @@ -1,3 +1,5 @@ +from typing import Optional + import pyarrow as pa import pyarrow_hotfix as _ # noqa: F401 from geoarrow.pyarrow._kernel import Kernel @@ -12,6 +14,16 @@ def __repr__(self): if pa_version[0] < 13: return super().__repr__() + # Pretty WKT printing needs geoarrow-c + try: + from geoarrow import c # noqa: F401 + except ImportError: + return ( + super().__repr__() + + "\n" + + "* pip install geoarrow-c for prettier printing of geometry scalars" + ) + max_width = 70 try: @@ -24,7 +36,7 @@ def __repr__(self): if len(string_formatted) >= max_width: string_formatted = string_formatted[: (max_width - 3)] + "..." - return f"{type(self).__name__}\n<{string_formatted}>" + return f"{type(self).__name__}:{repr(self.type)}\n<{string_formatted}>" def _array1(self): return self.type.wrap_array(pa.array([self.value])) @@ -72,28 +84,45 @@ def wkb(self): return self.value.as_py() -class PointScalar(GeometryExtensionScalar): - pass - +class BoxScalar(GeometryExtensionScalar): + @property + def bounds(self) -> dict: + return self.as_py() -class LinestringScalar(GeometryExtensionScalar): - pass + @property + def xmin(self) -> float: + return self.bounds["xmin"] + @property + def ymin(self) -> float: + return self.bounds["ymin"] -class PolygonScalar(GeometryExtensionScalar): - pass + @property + def xmax(self) -> float: + return self.bounds["xmax"] + @property + def ymax(self) -> float: + return self.bounds["ymax"] -class MultiPointScalar(GeometryExtensionScalar): - pass + @property + def zmin(self) -> Optional[float]: + return self.bounds["zmin"] if "zmin" in self.bounds else None + @property + def zmax(self) -> Optional[float]: + return self.bounds["zmax"] if "zmax" in self.bounds else None -class MultiLinestringScalar(GeometryExtensionScalar): - pass + @property + def mmin(self) -> Optional[float]: + return self.bounds["mmin"] if "mmin" in self.bounds else None + @property + def mmax(self) -> Optional[float]: + return self.bounds["mmax"] if "mmax" in self.bounds else None -class MultiPolygonScalar(GeometryExtensionScalar): - pass + def __repr__(self) -> str: + return f"BoxScalar({self.bounds})" def scalar_cls_from_name(name): @@ -101,20 +130,10 @@ def scalar_cls_from_name(name): return WkbScalar elif name == "geoarrow.wkt": return WktScalar - elif name == "geoarrow.point": - return PointScalar - elif name == "geoarrow.linestring": - return LinestringScalar - elif name == "geoarrow.polygon": - return PolygonScalar - elif name == "geoarrow.multipoint": - return MultiPointScalar - elif name == "geoarrow.multilinestring": - return MultiLinestringScalar - elif name == "geoarrow.multipolygon": - return MultiPolygonScalar + elif name == "geoarrow.box": + return BoxScalar else: - raise ValueError(f'Expected valid extension name but got "{name}"') + return GeometryExtensionScalar # Inject array_cls_from_name exactly once to avoid circular import diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/dataset.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/dataset.py index ef64b7f..eaa7b3c 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/dataset.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/dataset.py @@ -160,7 +160,7 @@ def index_fragments(self, num_threads=None): >>> table = pa.table([ga.array(["POINT (0.5 1.5)"])], ["geometry"]) >>> dataset = gads.dataset(table) >>> dataset.index_fragments().to_pylist() - [{'_fragment_index': 0, 'geometry': {'xmin': 0.5, 'xmax': 0.5, 'ymin': 1.5, 'ymax': 1.5}}] + [{'_fragment_index': 0, 'geometry': {'xmin': 0.5, 'ymin': 1.5, 'xmax': 0.5, 'ymax': 1.5}}] """ if self._index is None: self._index = self._build_index( @@ -213,7 +213,7 @@ def filter_fragments(self, target): if isinstance(target, str): target = [target] - target_box = box_agg(target) + target_box = box_agg(target).as_py() maybe_intersects = GeoDataset._index_box_intersects( self.index_fragments(), target_box, self.geometry_columns ) @@ -255,7 +255,7 @@ def _index_fragment(fragment, column, type): kernel = Kernel.box_agg(type) for batch in reader: kernel.push(batch.column(0)) - return kernel.finish() + return kernel.finish().storage @staticmethod def _index_fragments(fragments, columns, types, num_threads=None): @@ -295,7 +295,7 @@ def _index_fragments(fragments, columns, types, num_threads=None): @staticmethod def _index_box_intersects(index, box, columns): - xmin, xmax, ymin, ymax = box.as_py().values() + xmin, ymin, xmax, ymax = box.values() expressions = [] for col in columns: expr = ( diff --git a/geoarrow-pyarrow/src/geoarrow/pyarrow/io.py b/geoarrow-pyarrow/src/geoarrow/pyarrow/io.py index 593fdca..c4ffe92 100644 --- a/geoarrow-pyarrow/src/geoarrow/pyarrow/io.py +++ b/geoarrow-pyarrow/src/geoarrow/pyarrow/io.py @@ -508,6 +508,7 @@ def geoparquet_encoding_geoarrow(): "MultiPoint", "MultiLineString", "MultiPolygon", + "GeometryCollection", ] _GEOPARQUET_DIMENSION_LABELS = [None, "", " Z", " M", " ZM"] diff --git a/geoarrow-pyarrow/tests/test_pyarrow.py b/geoarrow-pyarrow/tests/test_pyarrow.py index 59bcd64..6396741 100644 --- a/geoarrow-pyarrow/tests/test_pyarrow.py +++ b/geoarrow-pyarrow/tests/test_pyarrow.py @@ -171,7 +171,21 @@ def test_scalar_geoarrow(): array = ga.as_geoarrow(["POINT (0 1)"]) assert array[0].wkt == "POINT (0 1)" assert array[0].wkb == ga.as_wkb(array).storage[0].as_py() - assert repr(array[0]).startswith("PointScalar") + assert repr(array[0]).startswith("GeometryExtensionScalar") + + +def test_scalar_box(): + # The box kernel doesn't yet implement non XY boxes + array = ga.box(["LINESTRING ZM (0 1 2 3, 4 5 6 7)"]) + assert array[0].xmin == 0 + assert array[0].ymin == 1 + assert array[0].zmin is None + assert array[0].mmin is None + assert array[0].xmax == 4 + assert array[0].ymax == 5 + assert array[0].zmax is None + assert array[0].mmax is None + assert repr(array[0]).startswith("BoxScalar") def test_scalar_repr(): @@ -227,7 +241,7 @@ def test_kernel_as(): out = kernel.push(array) assert out.type.extension_name == "geoarrow.point" assert out.type.crs.to_json_dict() == types.OGC_CRS84.to_json_dict() - assert isinstance(out, _array.PointArray) + assert isinstance(out, _array.GeometryExtensionArray) def test_kernel_format(): @@ -373,6 +387,24 @@ def test_multipolygon_array_from_geobuffers(): assert ga.as_wkt(arr)[0].as_py() == "MULTIPOLYGON (((1 4, 2 5, 3 6, 1 4)))" +def test_box_array_from_geobuffers(): + arr = ( + types.box() + .to_pyarrow() + .from_geobuffers( + b"\xff", + np.array([1.0, 2.0, 3.0]), + np.array([4.0, 5.0, 6.0]), + np.array([7.0, 8.0, 9.0]), + np.array([10.0, 11.0, 12.0]), + ) + ) + assert len(arr) == 3 + assert arr[2].bounds == {"xmin": 3.0, "ymin": 6.0, "xmax": 9.0, "ymax": 12.0} + assert "BoxArray" in repr(arr) + assert "'xmin': 3.0" in repr(arr) + + # Easier to test here because we have actual geoarrow arrays to parse def test_c_array_view(): arr = ga.as_geoarrow(["POLYGON ((0 0, 1 0, 0 1, 0 0))"]) diff --git a/geoarrow-types/src/geoarrow/types/crs.py b/geoarrow-types/src/geoarrow/types/crs.py index 520b091..e0e422e 100644 --- a/geoarrow-types/src/geoarrow/types/crs.py +++ b/geoarrow-types/src/geoarrow/types/crs.py @@ -133,7 +133,9 @@ def __repr__(self) -> str: class StringCrs(Crs): def __init__(self, crs: Union[str, bytes]): - if isinstance(crs, bytes): + if isinstance(crs, str): + self._crs = crs + elif isinstance(crs, bytes): self._crs = crs.decode() else: self._crs = str(crs) diff --git a/geoarrow-types/src/geoarrow/types/type_spec.py b/geoarrow-types/src/geoarrow/types/type_spec.py index 3d07ce8..725f54d 100644 --- a/geoarrow-types/src/geoarrow/types/type_spec.py +++ b/geoarrow-types/src/geoarrow/types/type_spec.py @@ -272,7 +272,7 @@ def from_extension_metadata(extension_metadata: str): if "crs_type" in metadata and metadata["crs_type"] == "projjson": out_crs = crs.ProjJsonCrs(metadata["crs"]) else: - out_crs = crs.StringCrs(metadata["crs"]) + out_crs = crs.create(metadata["crs"]) return TypeSpec(edge_type=out_edges, crs=out_crs)