Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

- `type` field to geoparquet writes ([#136](https://github.com/stac-utils/rustac-py/pull/136), <https://github.com/stac-utils/rustac/pull/736>)
- `parquet_compression` argument to `write` and `search_to` ([#150](https://github.com/stac-utils/rustac-py/pull/150))
- `iter_search` ([#151](https://github.com/stac-utils/rustac-py/pull/151))

### Fixed

Expand Down
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ pyo3-log = "0.12.1"
tracing = "0.1.41"
pyo3-object_store = "0.2.0"
parquet = "55.1.0"
futures-core = "0.3.31"
futures-util = "0.3.31"

[build-dependencies]
cargo-lock = "10"
Expand Down
66 changes: 65 additions & 1 deletion python/rustac/rustac.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ async def search(
**kwargs: str,
) -> list[dict[str, Any]]:
"""
Searches a STAC API server.
Searches a STAC API server or a stac-geoparquet file.

Args:
href: The STAC API to search.
Expand Down Expand Up @@ -333,6 +333,70 @@ async def search(
... )
"""

async def iter_search(
href: str,
*,
intersects: str | dict[str, Any] | None = None,
ids: str | list[str] | None = None,
collections: str | list[str] | None = None,
max_items: int | None = None,
limit: int | None = None,
bbox: list[float] | None = None,
datetime: str | None = None,
include: str | list[str] | None = None,
exclude: str | list[str] | None = None,
sortby: str | list[str | dict[str, str]] | None = None,
filter: str | dict[str, Any] | None = None,
query: dict[str, Any] | None = None,
use_duckdb: bool | None = None,
**kwargs: str,
) -> AsyncIterator[dict[str, Any]]:
"""
Searches a STAC API server and iterates over its items.

Args:
href: The STAC API to search.
intersects: Searches items
by performing intersection between their geometry and provided GeoJSON
geometry.
ids: Array of Item ids to return.
collections: Array of one or more Collection IDs that
each matching Item must be in.
limit: The page size returned from the server.
bbox: Requested bounding box.
datetime: Single date+time, or a range (`/` separator),
formatted to RFC 3339, section 5.6. Use double dots .. for open
date ranges.
include: fields to include in the response (see [the
extension
docs](https://github.com/stac-api-extensions/fields?tab=readme-ov-file#includeexclude-semantics))
for more on the semantics).
exclude: fields to exclude from the response (see [the
extension
docs](https://github.com/stac-api-extensions/fields?tab=readme-ov-file#includeexclude-semantics))
for more on the semantics).
sortby: Fields by which to sort results (use `-field` to sort descending).
filter: CQL2 filter expression. Strings
will be interpreted as cql2-text, dictionaries as cql2-json.
query: Additional filtering based on properties.
It is recommended to use filter instead, if possible.
kwargs: Additional parameters to pass in to the search.

Returns:
An iterator over STAC items

Examples:
>>> search = await rustac.iter_search(
... "https://landsatlook.usgs.gov/stac-server",
... collections=["landsat-c2l2-sr"],
... intersects={"type": "Point", "coordinates": [-105.119, 40.173]},
... sortby="-properties.datetime",
... )
>>> async for item in search:
... items.append(item)
...
"""

async def search_to(
outfile: str,
href: str,
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ fn rustac(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
)?)?;
m.add_function(wrap_pyfunction!(migrate::migrate, m)?)?;
m.add_function(wrap_pyfunction!(read::read, m)?)?;
m.add_function(wrap_pyfunction!(search::iter_search, m)?)?;
m.add_function(wrap_pyfunction!(search::search, m)?)?;
m.add_function(wrap_pyfunction!(search::search_to, m)?)?;
m.add_function(wrap_pyfunction!(version::sha, m)?)?;
Expand Down
98 changes: 95 additions & 3 deletions src/search.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,79 @@
use crate::{Error, Json, Result};
use futures_core::Stream;
use futures_core::stream::BoxStream;
use futures_util::StreamExt;
use geojson::Geometry;
use pyo3::prelude::*;
use pyo3::{Bound, FromPyObject, PyErr, PyResult, exceptions::PyValueError, types::PyDict};
use pyo3_object_store::AnyObjectStore;
use serde_json::{Map, Value};
use stac::Bbox;
use stac_api::{Fields, Filter, Items, Search, Sortby};
use stac_api::{Client, Fields, Filter, Items, Search, Sortby};
use stac_io::{Format, StacStore};
use std::sync::Arc;
use tokio::{pin, sync::Mutex};

#[pyclass]
struct SearchIterator(Arc<Mutex<BoxStream<'static, stac_api::Result<Map<String, Value>>>>>);

#[pymethods]
impl SearchIterator {
fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
slf
}

fn __anext__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
let stream = self.0.clone();
pyo3_async_runtimes::tokio::future_into_py(py, async move {
let mut stream = stream.lock().await;
if let Some(result) = stream.next().await {
let item = result.map_err(Error::from)?;
Ok(Some(Json(item)))
} else {
Ok(None)
}
})
}
}

#[pyfunction]
#[pyo3(signature = (href, *, intersects=None, ids=None, collections=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None, **kwargs))]
#[allow(clippy::too_many_arguments)]
pub fn iter_search<'py>(
py: Python<'py>,
href: String,
intersects: Option<StringOrDict>,
ids: Option<StringOrList>,
collections: Option<StringOrList>,
limit: Option<u64>,
bbox: Option<Vec<f64>>,
datetime: Option<String>,
include: Option<StringOrList>,
exclude: Option<StringOrList>,
sortby: Option<PySortby<'py>>,
filter: Option<StringOrDict>,
query: Option<Bound<'py, PyDict>>,
kwargs: Option<Bound<'_, PyDict>>,
) -> PyResult<Bound<'py, PyAny>> {
let search = build(
intersects,
ids,
collections,
limit,
bbox,
datetime,
include,
exclude,
sortby,
filter,
query,
kwargs,
)?;
pyo3_async_runtimes::tokio::future_into_py(py, async move {
let stream = iter_search_api(href, search).await?;
Ok(SearchIterator(Arc::new(Mutex::new(Box::pin(stream)))))
})
}

#[pyfunction]
#[pyo3(signature = (href, *, intersects=None, ids=None, collections=None, max_items=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None, use_duckdb=None, **kwargs))]
Expand Down Expand Up @@ -165,8 +233,32 @@ async fn search_api(
search: Search,
max_items: Option<usize>,
) -> Result<stac_api::ItemCollection> {
let value = stac_api::client::search(&href, search, max_items).await?;
Ok(value)
let stream = iter_search_api(href, search).await?;
pin!(stream);
let mut items = if let Some(max_items) = max_items {
Vec::with_capacity(max_items)
} else {
Vec::new()
};
while let Some(result) = stream.next().await {
let item = result?;
items.push(item);
if let Some(max_items) = max_items {
if items.len() >= max_items {
break;
}
}
}
Ok(items.into())
}

async fn iter_search_api(
href: String,
search: Search,
) -> Result<impl Stream<Item = stac_api::Result<Map<String, Value>>>> {
let client = Client::new(&href)?;
let stream = client.search(search).await?;
Ok(stream)
}

/// Creates a [Search] from Python arguments.
Expand Down
9 changes: 9 additions & 0 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,12 @@ async def test_cql(data: Path) -> None:
},
max_items=1,
)


async def test_iter_search() -> None:
items = []
search = await rustac.iter_search("https://landsatlook.usgs.gov/stac-server")
async for item in search:
items.append(item)
if len(items) >= 10:
break
Loading