Skip to content

Commit 637f347

Browse files
authored
feat(python): Constructor for GeoChunkedArray (#1383)
Closes #1382
1 parent 0f87a7e commit 637f347

File tree

5 files changed

+91
-13
lines changed

5 files changed

+91
-13
lines changed

python/geoarrow-core/python/geoarrow/rust/core/_chunked_array.pyi

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
from arro3.core.types import ArrowStreamExportable
1+
from typing import Sequence, overload
2+
from arro3.core.types import (
3+
ArrowStreamExportable,
4+
ArrowArrayExportable,
5+
ArrowSchemaExportable,
6+
)
27
from geoarrow.rust.core._scalar import GeoScalar
38

49
from ._array import GeoArray
@@ -12,6 +17,29 @@ class GeoChunkedArray:
1217
This class is used to handle chunked arrays in GeoArrow, which can be
1318
composed of multiple chunks of data.
1419
"""
20+
@overload
21+
def __init__(
22+
self, arrays: ArrowArrayExportable | ArrowStreamExportable, type: None = None
23+
) -> None: ...
24+
@overload
25+
def __init__(
26+
self,
27+
arrays: Sequence[ArrowArrayExportable],
28+
type: ArrowSchemaExportable | None = None,
29+
) -> None: ...
30+
def __init__(
31+
self,
32+
arrays: ArrowArrayExportable
33+
| ArrowStreamExportable
34+
| Sequence[ArrowArrayExportable],
35+
type: ArrowSchemaExportable | None = None,
36+
) -> None:
37+
"""Construct a new GeoChunkedArray.
38+
39+
Args:
40+
arrays: _description_
41+
type: _description_. Defaults to None.
42+
"""
1543

1644
def __arrow_c_stream__(self, requested_schema: object | None = None) -> object:
1745
"""

python/tests/core/operations/test_geometry_col.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pytest
44
import shapely
55
from arro3.core import Table
6-
from geoarrow.rust.core import GeoArray, geometry_col
6+
from geoarrow.rust.core import GeoArray, GeoChunkedArray, geometry_col
77

88

99
def geoarrow_array():
@@ -42,11 +42,10 @@ def test_geo_array_input():
4242
assert arr == geometry_col(arr)
4343

4444

45-
# TODO: implement once we have easy GeoChunkedArray constructor
46-
# def test_geo_chunked_array_input():
47-
# arr = geoarrow_array()
48-
# chunked = GeoChunkedArray.from_arrays([arr, arr])
49-
# assert chunked == geometry_col(chunked)
45+
def test_geo_chunked_array_input():
46+
arr = geoarrow_array()
47+
chunked = GeoChunkedArray([arr, arr])
48+
assert chunked == geometry_col(chunked)
5049

5150

5251
def test_table_no_geom_cols():

python/tests/core/operations/test_type_id.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,6 @@ def test_points_chunked():
9191
geoms2 = shapely.points([10, 20, 30], [40, 50, 60])
9292
arr1 = GeoArray.from_arrow(gpd.GeoSeries(geoms1).to_arrow("geoarrow"))
9393
arr2 = GeoArray.from_arrow(gpd.GeoSeries(geoms2).to_arrow("geoarrow"))
94-
ca = GeoChunkedArray.from_arrow(ChunkedArray([arr1, arr2]))
94+
ca = GeoChunkedArray([arr1, arr2])
9595
out = get_type_id(ca).read_all()
9696
assert (np.asarray(out) == 1).all()

python/tests/core/test_chunked_array.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def test_eq():
1414
geoms2 = shapely.points([10, 20, 30], [40, 50, 60])
1515
arr1 = GeoArray.from_arrow(gpd.GeoSeries(geoms1).to_arrow("geoarrow"))
1616
arr2 = GeoArray.from_arrow(gpd.GeoSeries(geoms2).to_arrow("geoarrow"))
17-
ca = GeoChunkedArray.from_arrow(ChunkedArray([arr1, arr2]))
17+
ca = GeoChunkedArray([arr1, arr2])
1818

1919
assert ca == ca
2020

@@ -30,7 +30,7 @@ def test_getitem():
3030
gdf = gpd.read_file(geodatasets.get_path("ny.bb"))
3131
arr1 = GeoArray.from_arrow(gdf.geometry.iloc[:2].to_arrow("geoarrow"))
3232
arr2 = GeoArray.from_arrow(gdf.geometry.iloc[2:].to_arrow("geoarrow"))
33-
ca = GeoChunkedArray.from_arrow(ChunkedArray([arr1, arr2]))
33+
ca = GeoChunkedArray([arr1, arr2])
3434

3535
for i in range(len(ca)):
3636
assert shapely.geometry.shape(ca[i]).equals(gdf.geometry.iloc[i]) # type: ignore
@@ -41,7 +41,7 @@ def test_repr():
4141
geoms2 = shapely.points([10, 20, 30], [40, 50, 60])
4242
arr1 = GeoArray.from_arrow(gpd.GeoSeries(geoms1).to_arrow("geoarrow"))
4343
arr2 = GeoArray.from_arrow(gpd.GeoSeries(geoms2).to_arrow("geoarrow"))
44-
ca = GeoChunkedArray.from_arrow(ChunkedArray([arr1, arr2]))
44+
ca = GeoChunkedArray([arr1, arr2])
4545
assert (
4646
repr(ca) == 'GeoChunkedArray(Point(dimension="XY", coord_type="interleaved"))'
4747
)
@@ -51,7 +51,7 @@ def test_downcast():
5151
coords = np.array([[1, 4], [2, 5], [3, 6]], dtype=np.float64)
5252

5353
point_arr = points(coords)
54-
point_ca = GeoChunkedArray.from_arrow(ChunkedArray([point_arr]))
54+
point_ca = GeoChunkedArray([point_arr])
5555
geometry_array = point_ca.cast(geometry())
5656
point_ca2 = geometry_array.downcast(coord_type="interleaved")
5757
assert point_ca == point_ca2
@@ -62,12 +62,22 @@ def test_downcast_with_crs():
6262

6363
crs = "EPSG:4326"
6464
point_arr = points(coords, crs=crs)
65-
point_ca = GeoChunkedArray.from_arrow(ChunkedArray([point_arr]))
65+
point_ca = GeoChunkedArray([point_arr])
6666
geometry_array = point_ca.cast(geometry(crs=crs))
6767
point_ca2 = geometry_array.downcast(coord_type="interleaved")
6868
assert point_ca == point_ca2
6969

7070

71+
def constructor_existing_chunked_array():
72+
geoms1 = shapely.points([1, 2, 3], [4, 5, 6])
73+
geoms2 = shapely.points([10, 20, 30], [40, 50, 60])
74+
arr1 = GeoArray.from_arrow(gpd.GeoSeries(geoms1).to_arrow("geoarrow"))
75+
arr2 = GeoArray.from_arrow(gpd.GeoSeries(geoms2).to_arrow("geoarrow"))
76+
ca = ChunkedArray([arr1, arr2])
77+
geo_ca = GeoChunkedArray(ca)
78+
assert geo_ca.chunk(0) == arr1
79+
80+
7181
class CustomException(Exception):
7282
pass
7383

rust/pyo3-geoarrow/src/chunked_array.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use pyo3_arrow::{PyArrayReader, PyChunkedArray};
1818

1919
use crate::data_type::PyGeoType;
2020
use crate::error::{PyGeoArrowError, PyGeoArrowResult};
21+
use crate::input::AnyGeoArray;
2122
use crate::scalar::PyGeoScalar;
2223
use crate::utils::text_repr::text_repr;
2324
use crate::{PyCoordType, PyGeoArray};
@@ -93,6 +94,46 @@ impl PyGeoChunkedArray {
9394

9495
#[pymethods]
9596
impl PyGeoChunkedArray {
97+
#[new]
98+
#[pyo3(signature = (arrays, r#type=None))]
99+
fn init(
100+
py: Python,
101+
arrays: &Bound<PyAny>,
102+
r#type: Option<PyGeoType>,
103+
) -> PyGeoArrowResult<Self> {
104+
if arrays.hasattr(intern!(py, "__arrow_c_array__"))?
105+
|| arrays.hasattr(intern!(py, "__arrow_c_stream__"))?
106+
{
107+
Ok(arrays.extract::<AnyGeoArray>()?.into_chunked_array()?)
108+
} else if let Ok(geo_arrays) = arrays.extract::<Vec<PyGeoArray>>() {
109+
let geo_arrays = geo_arrays
110+
.into_iter()
111+
.map(|arr| arr.into_inner())
112+
.collect::<Vec<_>>();
113+
114+
if !geo_arrays
115+
.windows(2)
116+
.all(|w| w[0].data_type() == w[1].data_type())
117+
{
118+
return Err(PyTypeError::new_err(
119+
"Cannot create a ChunkedArray with differing data types.",
120+
)
121+
.into());
122+
}
123+
124+
let geo_type = r#type
125+
.map(|py_data_type| py_data_type.into_inner())
126+
.unwrap_or_else(|| geo_arrays[0].data_type());
127+
128+
Ok(Self::try_new(geo_arrays, geo_type)?)
129+
} else {
130+
Err(
131+
PyTypeError::new_err("Expected ChunkedArray-like input or sequence of arrays.")
132+
.into(),
133+
)
134+
}
135+
}
136+
96137
#[pyo3(signature = (requested_schema=None))]
97138
fn __arrow_c_stream__<'py>(
98139
&self,

0 commit comments

Comments
 (0)