Skip to content

Commit 4f8f146

Browse files
committed
feat: search to arrow
1 parent cb694c8 commit 4f8f146

File tree

4 files changed

+70
-13
lines changed

4 files changed

+70
-13
lines changed

Cargo.lock

Lines changed: 9 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ stac-api = { features = [
2828
"client",
2929
"python",
3030
], git = "https://github.com/stac-utils/stac-rs" }
31-
stac-cli = { git = "https://github.com/stac-utils/stac-rs", default-features = false, features = [
31+
stac-cli = { git = "https://github.com/stac-utils/stac-rs", features = [
3232
"pgstac",
33+
"duckdb",
3334
] }
3435
stac-duckdb = { git = "https://github.com/stac-utils/stac-rs" }
3536
thiserror = "2.0.11"

src/duckdb.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ impl DuckdbClient {
115115
result?
116116
};
117117
if record_batches.is_empty() {
118-
todo!()
118+
Ok(py.None())
119119
} else {
120120
let schema = record_batches[0].schema();
121121
let table = PyTable::try_new(record_batches, schema)?;

stacrs.pyi

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from typing import Any, Optional, Tuple
22

3+
import arro3.core
4+
35
class DuckdbClient:
46
"""A client for querying stac-geoparquet with DuckDB."""
57

@@ -64,6 +66,62 @@ class DuckdbClient:
6466
dict[str, Any]: A feature collection of STAC items.
6567
"""
6668

69+
def search_to_arrow(
70+
self,
71+
href: str,
72+
*,
73+
ids: Optional[str | list[str]] = None,
74+
collections: Optional[str | list[str]] = None,
75+
intersects: Optional[str | dict[str, Any]] = None,
76+
limit: Optional[int] = None,
77+
offset: Optional[int] = None,
78+
bbox: Optional[list[float]] = None,
79+
datetime: Optional[str] = None,
80+
include: Optional[str | list[str]] = None,
81+
exclude: Optional[str | list[str]] = None,
82+
sortby: Optional[str | list[str]] = None,
83+
filter: Optional[str | dict[str, Any]] = None,
84+
query: Optional[dict[str, Any]] = None,
85+
**kwargs: str,
86+
) -> arro3.core.Table | None:
87+
"""Search a stac-geoparquet file with duckdb, returning an arrow table
88+
suitable for loading into (e.g.) GeoPandas.
89+
**stacrs** must be installed with the `arrow` extra, e.g. `python -m pip
90+
*install 'stacrs[arrow]'.
91+
92+
Args:
93+
href: The stac-geoparquet file.
94+
ids: Array of Item ids to return.
95+
collections: Array of one or more Collection IDs that each matching
96+
Item must be in.
97+
intersects: Searches items by performing intersection between their
98+
geometry and provided GeoJSON geometry.
99+
limit: The number of items to return.
100+
offset: The number of items to skip before returning.
101+
bbox: Requested bounding box.
102+
datetime: Single date+time, or a range (`/` separator), formatted to
103+
RFC 3339, section 5.6. Use double dots .. for open date ranges.
104+
include: fields to include in the response (see [the extension
105+
docs](https://github.com/stac-api-extensions/fields?tab=readme-ov-file#includeexclude-semantics))
106+
for more on the semantics).
107+
exclude: fields to exclude from the response (see [the extension
108+
docs](https://github.com/stac-api-extensions/fields?tab=readme-ov-file#includeexclude-semantics))
109+
for more on the semantics).
110+
sortby: Fields by which to sort results (use `-field` to sort descending).
111+
filter: CQL2 filter expression. Strings will be interpreted as
112+
cql2-text, dictionaries as cql2-json.
113+
query: Additional filtering based on properties. It is recommended
114+
to use filter instead, if possible.
115+
kwargs: Additional parameters to pass in to the search.
116+
117+
Returns:
118+
arro3.core.Table | None: An arrow table, or none if no records were returned.
119+
120+
Examples:
121+
>>> table = client.search_to_arrow("data/100-sentinel-2-items.parquet")
122+
>>> data_frame = GeoDataFrame.from_arrow(table)
123+
"""
124+
67125
def get_collections(self, href: str) -> list[dict[str, Any]]:
68126
"""Returns all collections in this stac-geoparquet file.
69127

0 commit comments

Comments
 (0)