diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b069bd..5547ec8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased] +### Added + +- DuckDB client ([#15](https://github.com/gadomski/stacrs/pull/15)) + ## [0.3.0] - 2024-11-21 ### Removed diff --git a/data/100-sentinel-2-items.parquet b/data/100-sentinel-2-items.parquet new file mode 100644 index 0000000..e0b6b0a Binary files /dev/null and b/data/100-sentinel-2-items.parquet differ diff --git a/src/duckdb.rs b/src/duckdb.rs new file mode 100644 index 0000000..76d654b --- /dev/null +++ b/src/duckdb.rs @@ -0,0 +1,60 @@ +use crate::{Error, Result}; +use pyo3::{exceptions::PyException, prelude::*, types::PyDict}; +use stac_api::python::{StringOrDict, StringOrList}; +use stac_duckdb::Client; +use std::sync::Mutex; + +#[pyclass(frozen)] +pub struct DuckdbClient(Mutex); + +#[pymethods] +impl DuckdbClient { + #[new] + fn new() -> Result { + let client = Client::new()?; + Ok(DuckdbClient(Mutex::new(client))) + } + + #[pyo3(signature = (href, *, intersects=None, ids=None, collections=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None, **kwargs))] + fn search<'py>( + &self, + py: Python<'py>, + href: String, + intersects: Option, + ids: Option, + collections: Option, + limit: Option, + bbox: Option>, + datetime: Option, + include: Option, + exclude: Option, + sortby: Option, + filter: Option, + query: Option>, + kwargs: Option>, + ) -> PyResult> { + let search = stac_api::python::search( + intersects, + ids, + collections, + limit, + bbox, + datetime, + include, + exclude, + sortby, + filter, + query, + kwargs, + )?; + let item_collection = { + let client = self + .0 + .lock() + .map_err(|err| PyException::new_err(err.to_string()))?; + client.search(&href, search).map_err(Error::from)? + }; + let dict = pythonize::pythonize(py, &item_collection)?; + dict.extract() + } +} diff --git a/src/lib.rs b/src/lib.rs index ca07726..ccbc023 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ #![deny(unused_crate_dependencies, warnings)] +mod duckdb; mod error; mod migrate; mod read; @@ -7,7 +8,7 @@ mod search; mod version; mod write; -use duckdb as _; +use ::duckdb as _; use error::Error; use pyo3::prelude::*; @@ -16,6 +17,7 @@ type Result = std::result::Result; /// A collection of functions for working with STAC, using Rust under the hood. #[pymodule] fn stacrs(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; m.add_function(wrap_pyfunction!(migrate::migrate, m)?)?; m.add_function(wrap_pyfunction!(migrate::migrate_href, m)?)?; m.add_function(wrap_pyfunction!(read::read, m)?)?; diff --git a/stacrs.pyi b/stacrs.pyi index 2c7298f..e8d688a 100644 --- a/stacrs.pyi +++ b/stacrs.pyi @@ -1,5 +1,27 @@ from typing import Any, Optional, Tuple +class DuckdbClient: + """A client for querying stac-geoparquet with DuckDB.""" + + def search( + href: str, + *, + intersects: Optional[str | dict[str, Any]] = None, + ids: Optional[str | list[str]] = None, + collections: Optional[str | list[str]] = None, + max_items: Optional[int] = None, + limit: Optional[int] = None, + bbox: Optional[list[float]] = None, + datetime: Optional[str] = None, + include: Optional[str | list[str]] = None, + exclude: Optional[str | list[str]] = None, + sortby: Optional[str | list[str]] = None, + filter: Optional[str | dict[str, Any]] = None, + query: Optional[dict[str, Any]] = None, + **kwargs: str, + ): + """Search a stac-geoparquet file with duckdb""" + def migrate_href(href: str, version: Optional[str] = None) -> dict[str, Any]: """ Migrates a STAC dictionary at the given href to another version. diff --git a/tests/test_duckdb.py b/tests/test_duckdb.py new file mode 100644 index 0000000..2ede109 --- /dev/null +++ b/tests/test_duckdb.py @@ -0,0 +1,30 @@ +import pytest +from stacrs import DuckdbClient + + +@pytest.fixture +def client() -> DuckdbClient: + return DuckdbClient() + + +def test_search(client: DuckdbClient) -> None: + item_collection = client.search("data/extended-item.parquet") + assert len(item_collection["features"]) == 1 + + +def test_search_offset(client: DuckdbClient) -> None: + item_collection = client.search( + "data/100-sentinel-2-items.parquet", offset=0, limit=1 + ) + assert ( + item_collection["features"][0]["id"] + == "S2B_MSIL2A_20241203T174629_R098_T13TDE_20241203T211406" + ) + + item_collection = client.search( + "data/100-sentinel-2-items.parquet", offset=1, limit=1 + ) + assert ( + item_collection["features"][0]["id"] + == "S2A_MSIL2A_20241201T175721_R141_T13TDE_20241201T213150" + )