diff --git a/docs/api/stac.md b/docs/api/stac.md new file mode 100644 index 0000000..46dff3e --- /dev/null +++ b/docs/api/stac.md @@ -0,0 +1,8 @@ +# STAC + +Typed dictionaries for STAC entities. + +::: rustac.Catalog +::: rustac.Collection +::: rustac.Item +::: rustac.ItemCollection diff --git a/mkdocs.yml b/mkdocs.yml index af6f257..00d4450 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -25,6 +25,7 @@ nav: - migrate: api/migrate.md - read: api/read.md - search: api/search.md + - stac: api/stac.md - version: api/version.md - walk: api/walk.md - write: api/write.md diff --git a/pyproject.toml b/pyproject.toml index a1ae72b..e90b783 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ asyncio_mode = "auto" asyncio_default_fixture_loop_scope = "function" [tool.ruff] -exclude = ["docs/examples/example_*.py"] +exclude = ["python/rustac/__init__.py", "docs/examples/example_*.py"] [dependency-groups] dev = [ @@ -87,7 +87,8 @@ requires = ["maturin>=1.7,<2.0"] build-backend = "maturin" [tool.maturin] +python-source = "python" strip = true -opt-level = "z" # TODO compare with "s" +opt-level = "z" # TODO compare with "s" lto = true codegen-units = 1 diff --git a/python/rustac/__init__.py b/python/rustac/__init__.py new file mode 100644 index 0000000..4bb3ffc --- /dev/null +++ b/python/rustac/__init__.py @@ -0,0 +1,253 @@ +from __future__ import annotations + +from .rustac import * +from typing import TypedDict + + +class Catalog(TypedDict): + """A STAC Catalog object represents a logical group of other Catalog, Collection, and Item objects.""" + + type: str + """Set to Catalog if this Catalog only implements the Catalog spec.""" + + stac_version: str + """The STAC version the Catalog implements.""" + + stac_extensions: list[str] | None + """A list of extension identifiers the Catalog implements.""" + + id: str + """Identifier for the Catalog.""" + + title: str | None + """A short descriptive one-line title for the Catalog.""" + + description: str + """Detailed multi-line description to fully explain the Catalog. + + CommonMark 0.29 syntax MAY be used for rich text representation.""" + + links: list[Link] + """A list of references to other documents.""" + +class Collection(TypedDict): + """The STAC Collection Specification defines a set of common fields to describe a group of Items that share properties and metadata.""" + + type: str + """Must be set to Collection to be a valid Collection.""" + + stac_version: str + """The STAC version the Collection implements.""" + + stac_extensions: list[str] | None + """A list of extension identifiers the Collection implements.""" + + id: str + """Identifier for the Collection that is unique across all collections in the root catalog.""" + + title: str | None + """A short descriptive one-line title for the Collection.""" + + description: str + """Detailed multi-line description to fully explain the Collection. + + CommonMark 0.29 syntax MAY be used for rich text representation.""" + + keywords: list[str] | None + """List of keywords describing the Collection.""" + + license: str + """License(s) of the data collection as SPDX License identifier, SPDX License expression, or `other`.""" + + providers: list[Provider] | None + """A list of providers, which may include all organizations capturing or processing the data or the hosting provider.""" + + extent: Extent + """Spatial and temporal extents.""" + + summaries: dict[str, Any] + """A map of property summaries, either a set of values, a range of values or a JSON Schema.""" + + links: list[Link] + """A list of references to other documents.""" + + assets: dict[str, Asset] | None + """Dictionary of asset objects that can be downloaded, each with a unique key.""" + + item_assets: dict[str, ItemAsset] | None + """A dictionary of assets that can be found in member Items.""" + +class Provider(TypedDict): + """A provider is any of the organizations that captures or processes the content of the Collection and therefore influences the data offered by this Collection.""" + + name: str + """The name of the organization or the individual.""" + + description: str | None + """Multi-line description to add further provider information such as processing details for processors and producers, hosting details for hosts or basic contact information. + + CommonMark 0.29 syntax MAY be used for rich text representation.""" + + roles: list[ + Literal["licensor"] + | Literal["producer"] + | Literal["processor"] + | Literal["host"] + ] + """Roles of the provider.""" + + url: str | None + """Homepage on which the provider describes the dataset and publishes contact information.""" + +class Extent(TypedDict): + """The object describes the spatio-temporal extents of the Collection.""" + + spatial: SpatialExtent + """Potential spatial extents covered by the Collection.""" + + temporal: TemporalExtent + """Potential temporal extents covered by the Collection.""" + +class SpatialExtent(TypedDict): + """The object describes the spatial extents of the Collection.""" + + bbox: list[list[int | float]] + """Potential spatial extents covered by the Collection.""" + +class TemporalExtent(TypedDict): + """The object describes the temporal extents of the Collection.""" + + bbox: list[list[str | None]] + """Potential temporal extents covered by the Collection.""" + +class ItemAsset(TypedDict): + """An Item Asset Object defined at the Collection level is nearly the same as the Asset Object in Items, except for two differences. + + The href field is not required, because Item Asset Definitions don't point to any data by themselves, but at least two other fields must be present.""" + + title: str | None + """The displayed title for clients and users.""" + + description: str | None + """A description of the Asset providing additional details, such as how it was processed or created. + + CommonMark 0.29 syntax MAY be used for rich text representation.""" + + type: str | None + """Media type of the asset.""" + + roles: list[str] | None + """The semantic roles of the asset, similar to the use of rel in links.""" + +class Item(TypedDict): + """An Item is a GeoJSON Feature augmented with foreign members relevant to a STAC object.""" + + type: str + """Type of the GeoJSON Object. MUST be set to Feature.""" + + stac_version: str + """The STAC version the Item implements.""" + + stac_extensions: list[str] | None + """A list of extensions the Item implements.""" + + id: str + """Provider identifier. The ID should be unique within the Collection that contains the Item.""" + + geometry: dict[str, Any] | None + """Defines the full footprint of the asset represented by this item, formatted according to RFC 7946, section 3.1 if a geometry is provided or section 3.2 if no geometry is provided.""" + + bbox: list[int | float] | None + """REQUIRED if geometry is not null, prohibited if geometry is null. + + Bounding Box of the asset represented by this Item, formatted according to RFC 7946, section 5.""" + + properties: Properties + """A dictionary of additional metadata for the Item.""" + + links: list[Link] + """List of link objects to resources and related URLs. + + See the best practices for details on when the use self links is strongly recommended.""" + + assets: dict[str, Asset] + """Dictionary of asset objects that can be downloaded, each with a unique key.""" + + collection: str | None + """The id of the STAC Collection this Item references to. + + This field is required if a link with a collection relation type is present and is not allowed otherwise.""" + +class Properties(TypedDict): + """Additional metadata fields can be added to the GeoJSON Object Properties.""" + + datetime: str | None + """The searchable date and time of the assets, which must be in UTC. + + It is formatted according to RFC 3339, section 5.6. null is allowed, but requires start_datetime and end_datetime from common metadata to be set.""" + +class Link(TypedDict): + """This object describes a relationship with another entity. + + Data providers are advised to be liberal with the links section, to describe + things like the Catalog an Item is in, related Items, parent or child Items + (modeled in different ways, like an 'acquisition' or derived data).""" + + href: str + """The actual link in the format of an URL. + + Relative and absolute links are both allowed. Trailing slashes are significant.""" + + rel: str + """Relationship between the current document and the linked document.""" + + type: str | None + """Media type of the referenced entity.""" + + title: str | None + """A human readable title to be used in rendered displays of the link.""" + + method: str | None + """The HTTP method that shall be used for the request to the target resource, in uppercase. + + GET by default""" + + headers: dict[str, str | list[str]] | None + """The HTTP headers to be sent for the request to the target resource.""" + + body: Any | None + """The HTTP body to be sent to the target resource.""" + +class Asset(TypedDict): + """An Asset is an object that contains a URI to data associated with the Item that can be downloaded or streamed. + + It is allowed to add additional fields.""" + + href: str + """URI to the asset object. Relative and absolute URI are both allowed. Trailing slashes are significant.""" + + title: str | None + """The displayed title for clients and users.""" + + description: str | None + """A description of the Asset providing additional details, such as how it was processed or created. + + CommonMark 0.29 syntax MAY be used for rich text representation.""" + + type: str | None + """Media type of the asset. + + See the common media types in the best practice doc for commonly used asset types.""" + + roles: list[str] | None + """The semantic roles of the asset, similar to the use of rel in links.""" + +class ItemCollection(TypedDict): + """A GeoJSON feature collection of STAC Items.""" + + features: list[Item] + """STAC items.""" + +__doc__ = rustac.__doc__ +if hasattr(rustac, "__all__"): + __all__ = rustac.__all__ diff --git a/python/rustac/py.typed b/python/rustac/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/rustac.pyi b/python/rustac/rustac.pyi similarity index 97% rename from rustac.pyi rename to python/rustac/rustac.pyi index aa76584..1134685 100644 --- a/rustac.pyi +++ b/python/rustac/rustac.pyi @@ -1,7 +1,11 @@ +"""The power of Rust for the Python STAC ecosystem.""" + from typing import Any, AsyncIterator, Literal, Optional, Tuple import arro3.core +from rustac import Catalog, Collection, Item, ItemCollection + class RustacError(Exception): """A package-specific exception.""" @@ -144,7 +148,7 @@ class DuckdbClient: >>> data_frame = GeoDataFrame.from_arrow(table) """ - def get_collections(self, href: str) -> list[dict[str, Any]]: + def get_collections(self, href: str) -> list[Collection]: """Returns all collections in this stac-geoparquet file. These collections will be auto-generated from the STAC items, one @@ -215,7 +219,7 @@ async def read( def from_arrow( table: arro3.core.Table, -) -> dict[str, Any]: +) -> ItemCollection: """ Converts an [arro3.core.Table][] to a STAC item collection. @@ -229,7 +233,7 @@ def from_arrow( """ def to_arrow( - items: list[dict[str, Any]] | dict[str, Any], + items: list[Item] | ItemCollection, ) -> arro3.core.Table: """ Converts items to an [arro3.core.Table][]. @@ -260,7 +264,7 @@ async def search( query: Optional[dict[str, Any]] = None, use_duckdb: Optional[bool] = None, **kwargs: str, -) -> list[dict[str, Any]]: +) -> dict[str, Any]: """ Searches a STAC API server. @@ -299,10 +303,10 @@ async def search( kwargs: Additional parameters to pass in to the search. Returns: - A list of the returned STAC items. + A feature collection of the returned STAC items. Examples: - >>> items = await rustac.search( + >>> item_collection = await rustac.search( ... "https://landsatlook.usgs.gov/stac-server", ... collections=["landsat-c2l2-sr"], ... intersects={"type": "Point", "coordinates": [-105.119, 40.173]}, @@ -387,7 +391,7 @@ async def search_to( def walk( container: dict[str, Any], -) -> AsyncIterator[tuple[dict[str, Any], list[dict[str, Any]], list[dict[str, Any]]]]: +) -> AsyncIterator[tuple[Catalog | Collection, list[Catalog | Collection], list[Item]]]: """Recursively walks a STAC catalog or collection breadth-first. Args: diff --git a/tests/conftest.py b/tests/conftest.py index b12df5b..f6c4525 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,8 @@ import json from pathlib import Path -from typing import Any import pytest +from rustac import Item @pytest.fixture @@ -26,6 +26,6 @@ def data(root: Path) -> Path: @pytest.fixture -def item(examples: Path) -> dict[str, Any]: +def item(examples: Path) -> Item: with open(examples / "simple-item.json") as f: return json.load(f) diff --git a/tests/test_arrow.py b/tests/test_arrow.py index 3ecbed3..a63d1f5 100644 --- a/tests/test_arrow.py +++ b/tests/test_arrow.py @@ -1,13 +1,12 @@ -from typing import Any - import pytest import rustac from geopandas import GeoDataFrame +from rustac import Item pytest.importorskip("arro3.core") -def test_to_arrow(item: dict[str, Any]) -> None: +def test_to_arrow(item: Item) -> None: table = rustac.to_arrow([item]) data_frame = GeoDataFrame.from_arrow(table) assert len(data_frame) == 1