diff --git a/Cargo.lock b/Cargo.lock index c67c65c..2b4f936 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -733,9 +733,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.37" +version = "4.5.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eccb054f56cbd38340b380d4a8e69ef1f02f1af43db2f0cc817a4774d80ae071" +checksum = "ed93b9805f8ba930df42c2590f05453d5ec36cbb85d018868a5b24d31f6ac000" dependencies = [ "clap_builder", "clap_derive", @@ -743,9 +743,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.37" +version = "4.5.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd9466fac8543255d3b1fcad4762c5e116ffe808c8a3043d4263cd4fd4862a2" +checksum = "379026ff283facf611b0ea629334361c4211d1b12ee01024eec1591133b04120" dependencies = [ "anstream", "anstyle", @@ -1375,9 +1375,9 @@ dependencies = [ [[package]] name = "geographiclib-rs" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6e5ed84f8089c70234b0a8e0aedb6dc733671612ddc0d37c6066052f9781960" +checksum = "f611040a2bb37eaa29a78a128d1e92a378a03e0b6e66ae27398d42b1ba9a7841" dependencies = [ "libm", ] @@ -2645,7 +2645,7 @@ dependencies = [ [[package]] name = "pgstac" version = "0.3.0" -source = "git+https://github.com/stac-utils/rustac?branch=main#20b2823bb1feda50806f3ff112c3a7acbb6f570e" +source = "git+https://github.com/stac-utils/rustac?branch=main#2e988e14947ebc90ca7043a9faa0066abe9da78b" dependencies = [ "serde", "serde_json", @@ -3348,9 +3348,9 @@ dependencies = [ [[package]] name = "robust" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbf4a6aa5f6d6888f39e980649f3ad6b666acdce1d78e95b8a2cb076e687ae30" +checksum = "4e27ee8bb91ca0adcf0ecb116293afa12d393f9c2b9b9cd54d33e8078fe19839" [[package]] name = "rstar" @@ -3382,7 +3382,7 @@ dependencies = [ [[package]] name = "rustac" version = "0.5.3" -source = "git+https://github.com/stac-utils/rustac?branch=main#20b2823bb1feda50806f3ff112c3a7acbb6f570e" +source = "git+https://github.com/stac-utils/rustac?branch=main#2e988e14947ebc90ca7043a9faa0066abe9da78b" dependencies = [ "anyhow", "axum", @@ -3504,9 +3504,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.2" +version = "0.103.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7149975849f1abb3832b246010ef62ccc80d3a76169517ada7188252b9cfb437" +checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435" dependencies = [ "ring", "rustls-pki-types", @@ -3787,7 +3787,7 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "stac" version = "0.12.0" -source = "git+https://github.com/stac-utils/rustac?branch=main#20b2823bb1feda50806f3ff112c3a7acbb6f570e" +source = "git+https://github.com/stac-utils/rustac?branch=main#2e988e14947ebc90ca7043a9faa0066abe9da78b" dependencies = [ "arrow-array", "arrow-cast", @@ -3822,7 +3822,7 @@ dependencies = [ [[package]] name = "stac-api" version = "0.7.1" -source = "git+https://github.com/stac-utils/rustac?branch=main#20b2823bb1feda50806f3ff112c3a7acbb6f570e" +source = "git+https://github.com/stac-utils/rustac?branch=main#2e988e14947ebc90ca7043a9faa0066abe9da78b" dependencies = [ "async-stream", "chrono", @@ -3847,7 +3847,7 @@ dependencies = [ [[package]] name = "stac-derive" version = "0.2.0" -source = "git+https://github.com/stac-utils/rustac?branch=main#20b2823bb1feda50806f3ff112c3a7acbb6f570e" +source = "git+https://github.com/stac-utils/rustac?branch=main#2e988e14947ebc90ca7043a9faa0066abe9da78b" dependencies = [ "quote", "syn 2.0.101", @@ -3856,7 +3856,7 @@ dependencies = [ [[package]] name = "stac-duckdb" version = "0.1.1" -source = "git+https://github.com/stac-utils/rustac?branch=main#20b2823bb1feda50806f3ff112c3a7acbb6f570e" +source = "git+https://github.com/stac-utils/rustac?branch=main#2e988e14947ebc90ca7043a9faa0066abe9da78b" dependencies = [ "arrow-array", "chrono", @@ -3875,7 +3875,7 @@ dependencies = [ [[package]] name = "stac-server" version = "0.3.4" -source = "git+https://github.com/stac-utils/rustac?branch=main#20b2823bb1feda50806f3ff112c3a7acbb6f570e" +source = "git+https://github.com/stac-utils/rustac?branch=main#2e988e14947ebc90ca7043a9faa0066abe9da78b" dependencies = [ "axum", "bb8", @@ -4313,9 +4313,9 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.2" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "403fa3b783d4b626a8ad51d766ab03cb6d2dbfc46b1c5d4448395e6628dc9697" +checksum = "0fdb0c213ca27a9f57ab69ddb290fd80d970922355b83ae380b395d3986b8a2e" dependencies = [ "bitflags", "bytes", diff --git a/python/rustac/rustac.pyi b/python/rustac/rustac.pyi index 5241d35..8dde207 100644 --- a/python/rustac/rustac.pyi +++ b/python/rustac/rustac.pyi @@ -1,7 +1,8 @@ """The power of Rust for the Python STAC ecosystem.""" +from collections.abc import AsyncIterator from pathlib import Path -from typing import Any, AsyncIterator, Literal, Optional, Tuple +from typing import Any, Literal import arro3.core @@ -45,18 +46,18 @@ class DuckdbClient: self, href: str, *, - ids: Optional[str | list[str]] = None, - collections: Optional[str | list[str]] = None, - intersects: Optional[str | dict[str, Any]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - bbox: Optional[list[float]] = None, - datetime: Optional[str] = None, - include: Optional[str | list[str]] = None, - exclude: Optional[str | list[str]] = None, - sortby: Optional[str | list[str | dict[str, str]]] = None, - filter: Optional[str | dict[str, Any]] = None, - query: Optional[dict[str, Any]] = None, + ids: str | list[str] | None = None, + collections: str | list[str] | None = None, + intersects: str | dict[str, Any] | None = None, + limit: int | None = None, + offset: int | None = None, + bbox: list[float] | None = None, + datetime: str | None = None, + include: str | list[str] | None = None, + exclude: str | list[str] | None = None, + sortby: str | list[str | dict[str, str]] | None = None, + filter: str | dict[str, Any] | None = None, + query: dict[str, Any] | None = None, **kwargs: str, ) -> list[dict[str, Any]]: """Search a stac-geoparquet file with duckdb, returning a list of items. @@ -94,18 +95,18 @@ class DuckdbClient: self, href: str, *, - ids: Optional[str | list[str]] = None, - collections: Optional[str | list[str]] = None, - intersects: Optional[str | dict[str, Any]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - bbox: Optional[list[float]] = None, - datetime: Optional[str] = None, - include: Optional[str | list[str]] = None, - exclude: Optional[str | list[str]] = None, - sortby: Optional[str | list[str | dict[str, str]]] = None, - filter: Optional[str | dict[str, Any]] = None, - query: Optional[dict[str, Any]] = None, + ids: str | list[str] | None = None, + collections: str | list[str] | None = None, + intersects: str | dict[str, Any] | None = None, + limit: int | None = None, + offset: int | None = None, + bbox: list[float] | None = None, + datetime: str | None = None, + include: str | list[str] | None = None, + exclude: str | list[str] | None = None, + sortby: str | list[str | dict[str, str]] | None = None, + filter: str | dict[str, Any] | None = None, + query: dict[str, Any] | None = None, **kwargs: str, ) -> arro3.core.Table | None: """Search a stac-geoparquet file with duckdb, returning an arrow table @@ -179,7 +180,7 @@ def collection_from_id_and_items(id: str, items: list[Item]) -> Collection: A STAC collection """ -def migrate(value: dict[str, Any], version: Optional[str] = None) -> dict[str, Any]: +def migrate(value: dict[str, Any], version: str | None = None) -> dict[str, Any]: """ Migrates a STAC dictionary to another version. @@ -264,19 +265,19 @@ def to_arrow( async def search( href: str, *, - intersects: Optional[str | dict[str, Any]] = None, - ids: Optional[str | list[str]] = None, - collections: Optional[str | list[str]] = None, - max_items: Optional[int] = None, - limit: Optional[int] = None, - bbox: Optional[list[float]] = None, - datetime: Optional[str] = None, - include: Optional[str | list[str]] = None, - exclude: Optional[str | list[str]] = None, - sortby: Optional[str | list[str | dict[str, str]]] = None, - filter: Optional[str | dict[str, Any]] = None, - query: Optional[dict[str, Any]] = None, - use_duckdb: Optional[bool] = None, + intersects: str | dict[str, Any] | None = None, + ids: str | list[str] | None = None, + collections: str | list[str] | None = None, + max_items: int | None = None, + limit: int | None = None, + bbox: list[float] | None = None, + datetime: str | None = None, + include: str | list[str] | None = None, + exclude: str | list[str] | None = None, + sortby: str | list[str | dict[str, str]] | None = None, + filter: str | dict[str, Any] | None = None, + query: dict[str, Any] | None = None, + use_duckdb: bool | None = None, **kwargs: str, ) -> list[dict[str, Any]]: """ @@ -333,21 +334,21 @@ async def search_to( outfile: str, href: str, *, - intersects: Optional[str | dict[str, Any]] = None, - ids: Optional[str | list[str]] = None, - collections: Optional[str | list[str]] = None, - max_items: Optional[int] = None, - limit: Optional[int] = None, - bbox: Optional[list[float]] = None, - datetime: Optional[str] = None, - include: Optional[str | list[str]] = None, - exclude: Optional[str | list[str]] = None, - sortby: Optional[str | list[str | dict[str, str]]] = None, - filter: Optional[str | dict[str, Any]] = None, - query: Optional[dict[str, Any]] = None, - format: Optional[str] = None, - options: Optional[list[Tuple[str, str]]] = None, - use_duckdb: Optional[bool] = None, + intersects: str | dict[str, Any] | None = None, + ids: str | list[str] | None = None, + collections: str | list[str] | None = None, + max_items: int | None = None, + limit: int | None = None, + bbox: list[float] | None = None, + datetime: str | None = None, + include: str | list[str] | None = None, + exclude: str | list[str] | None = None, + sortby: str | list[str | dict[str, str]] | None = None, + filter: str | dict[str, Any] | None = None, + query: dict[str, Any] | None = None, + format: str | None = None, + store: ObjectStore | None = None, + use_duckdb: bool | None = None, ) -> int: """ Searches a STAC API server and saves the result to an output file. @@ -385,7 +386,7 @@ async def search_to( It is recommended to use filter instead, if possible. format: The output format. If none, will be inferred from the outfile extension, and if that fails will fall back to compact JSON. - options: Configuration values to pass to the object store backend. + store: An optional [ObjectStore][] use_duckdb: Query with DuckDB. If None and the href has a 'parquet' or 'geoparquet' extension, will be set to True. Defaults to None. diff --git a/src/search.rs b/src/search.rs index 3ca5783..239135c 100644 --- a/src/search.rs +++ b/src/search.rs @@ -2,6 +2,7 @@ use crate::{Error, Json, Result}; use geojson::Geometry; use pyo3::prelude::*; use pyo3::{Bound, FromPyObject, PyErr, PyResult, exceptions::PyValueError, types::PyDict}; +use pyo3_object_store::AnyObjectStore; use stac::Bbox; use stac::Format; use stac_api::{Fields, Filter, Items, Search, Sortby}; @@ -57,7 +58,7 @@ pub fn search<'py>( } #[pyfunction] -#[pyo3(signature = (outfile, href, *, intersects=None, ids=None, collections=None, max_items=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None, format=None, options=None, use_duckdb=None, **kwargs))] +#[pyo3(signature = (outfile, href, *, intersects=None, ids=None, collections=None, max_items=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None, format=None, store=None, use_duckdb=None, **kwargs))] #[allow(clippy::too_many_arguments)] pub fn search_to<'py>( py: Python<'py>, @@ -76,7 +77,7 @@ pub fn search_to<'py>( filter: Option, query: Option>, format: Option, - options: Option>, + store: Option, use_duckdb: Option, kwargs: Option>, ) -> PyResult> { @@ -106,28 +107,36 @@ pub fn search_to<'py>( pyo3_async_runtimes::tokio::future_into_py(py, async move { let value = search_duckdb(href, search, max_items)?; let count = value.items.len(); - let _ = format - .put_opts( - outfile, - serde_json::to_value(value).map_err(Error::from)?, - options.unwrap_or_default(), - ) - .await - .map_err(Error::from)?; + let value = serde_json::to_value(value).map_err(Error::from)?; + if let Some(store) = store { + format + .put_store(store.into_dyn(), outfile, value) + .await + .map_err(Error::from)?; + } else { + format + .put_opts(outfile, value, [] as [(&str, &str); 0]) + .await + .map_err(Error::from)?; + } Ok(count) }) } else { pyo3_async_runtimes::tokio::future_into_py(py, async move { let value = search_api(href, search, max_items).await?; let count = value.items.len(); - let _ = format - .put_opts( - outfile, - serde_json::to_value(value).map_err(Error::from)?, - options.unwrap_or_default(), - ) - .await - .map_err(Error::from)?; + let value = serde_json::to_value(value).map_err(Error::from)?; + if let Some(store) = store { + format + .put_store(store.into_dyn(), outfile, value) + .await + .map_err(Error::from)?; + } else { + format + .put_opts(outfile, value, [] as [(&str, &str); 0]) + .await + .map_err(Error::from)?; + } Ok(count) }) } diff --git a/tests/test_search.py b/tests/test_search.py index 287aa95..d8c33a7 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -5,6 +5,7 @@ import pyarrow.parquet import rustac import stac_geoparquet.arrow +from rustac.store import MemoryStore async def test_search() -> None: @@ -64,3 +65,13 @@ async def test_sortby_list_of_dict() -> None: async def test_proj_geometry(maxar_items: list[dict[str, Any]], tmp_path: Path) -> None: await rustac.write(str(tmp_path / "out.parquet"), maxar_items) + + +async def test_search_to_store(data: Path) -> None: + store = MemoryStore() + count = await rustac.search_to( + "items.json", str(data / "100-sentinel-2-items.parquet"), store=store + ) + assert count == 100 + item_collection = await rustac.read("items.json", store=store) + assert len(item_collection["features"]) == 100