Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [Unreleased]

### Added

- Create a item collection from an arrow table ([#57](https://github.com/stac-utils/stacrs/pull/57))

## [0.5.6-beta.0] - 2025-02-22

### Added
Expand Down
66 changes: 34 additions & 32 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ name = "stacrs"
crate-type = ["cdylib"]

[dependencies]
clap = "4.5.30"
clap = "4.5.31"
geojson = "0.24.1"
pyo3 = { version = "0.23.4", features = ["extension-module"] }
geoarrow = "0.4.0-beta.3"
pyo3 = { version = "0.23.5", features = ["extension-module"] }
pyo3-async-runtimes = { version = "0.23.0", features = [
"tokio",
"tokio-runtime",
Expand Down
546,698 changes: 546,619 additions & 79 deletions docs/example.ipynb

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ dependencies = []

[project.optional-dependencies]
arrow = ["arro3-core>=0.4.5"]
docs = [
"jinja2>=3.1.4",
]

[project.scripts]
stacrs = "stacrs:main"
Expand Down Expand Up @@ -58,6 +61,9 @@ dev = [
"stac-geoparquet>=0.6.0",
]
docs = [
"contextily>=1.6.2",
"humanize>=4.12.1",
"jinja2>=3.1.4",
"mike>=2.1.3",
"mkdocs-jupyter>=0.25.1",
"mkdocs-material[imaging]>=9.5.45",
Expand Down
55 changes: 55 additions & 0 deletions src/arrow.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
use crate::{Error, Json, Result};
use geoarrow::table::Table;
use pyo3::{prelude::*, IntoPyObjectExt};
use pyo3_arrow::PyTable;
use serde_json::Value;
use stac::{Item, ItemCollection};

#[pyfunction]
pub fn from_arrow(py: Python<'_>, table: PyTable) -> PyResult<Bound<PyAny>> {
let (record_batches, mut schema) = table.into_inner();
let record_batches = record_batches
.into_iter()
.map(|record_batch| {
let record_batch = stac::geoarrow::with_native_geometry(record_batch, "geometry")?;
Ok(record_batch)
})
.collect::<Result<Vec<_>>>()?;
if !record_batches.is_empty() {
schema = record_batches[0].schema();
}
let table = Table::try_new(record_batches, schema).map_err(Error::from)?;
let item_collection = stac::geoarrow::from_table(table).map_err(Error::from)?;
let item_collection = Json(item_collection).into_pyobject(py)?;
Ok(item_collection)
}

#[pyfunction]
pub fn to_arrow(py: Python<'_>, items: Bound<PyAny>) -> PyResult<PyObject> {
let value: Value = pythonize::depythonize(&items)?;
let item_collection = if let Value::Array(array) = value {
let items = array
.into_iter()
.map(|value| serde_json::from_value::<Item>(value).map_err(Error::from))
.collect::<Result<Vec<_>>>()?;
ItemCollection::from(items)
} else {
serde_json::from_value(value).map_err(Error::from)?
};
// TODO we might want to just allow use to go WKB right when we got to table?
let (record_batches, mut schema) = stac::geoarrow::to_table(item_collection)
.map_err(Error::from)?
.into_inner();
let record_batches = record_batches
.into_iter()
.map(|record_batch| {
stac::geoarrow::with_wkb_geometry(record_batch, "geometry").map_err(Error::from)
})
.collect::<Result<Vec<_>>>()?;
if !record_batches.is_empty() {
schema = record_batches[0].schema();
}
let table = PyTable::try_new(record_batches, schema)?;
let table = table.to_arro3(py)?;
Ok(table.into_py_any(py)?)
}
3 changes: 3 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ pub enum Error {
#[error(transparent)]
Geojson(#[from] geojson::Error),

#[error(transparent)]
Geoarrow(#[from] geoarrow::error::GeoArrowError),

#[error(transparent)]
Io(#[from] std::io::Error),

Expand Down
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#![deny(unused_crate_dependencies)]

mod arrow;
mod cli;
mod duckdb;
mod error;
Expand All @@ -20,6 +21,8 @@ fn stacrs(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {

m.add_class::<duckdb::DuckdbClient>()?;

m.add_function(wrap_pyfunction!(arrow::from_arrow, m)?)?;
m.add_function(wrap_pyfunction!(arrow::to_arrow, m)?)?;
m.add_function(wrap_pyfunction!(cli::main, m)?)?;
m.add_function(wrap_pyfunction!(migrate::migrate, m)?)?;
m.add_function(wrap_pyfunction!(migrate::migrate_href, m)?)?;
Expand Down
30 changes: 30 additions & 0 deletions stacrs.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,36 @@ async def read(
>>> item = await stacrs.read("item.json")
"""

def from_arrow(
table: arro3.core.Table,
) -> dict[str, Any]:
"""
Converts an [arro3.core.table][] to a STAC item collection.

Requires **stacrs** to be installed with the `arrow` extra.

Args:
table: The table

Returns:
dict[str, Any]: The STAC item collection
"""

def to_arrow(
items: list[dict[str, Any]] | dict[str, Any],
) -> arro3.core.Table:
"""
Converts items to an [arro3.core.table][].

Requires **stacrs** to be installed with the `arrow` extra.

Args:
items: Either an iterable of items or a item collection

Returns:
arro3.core.Table: The table
"""

async def search(
href: str,
*,
Expand Down
Loading
Loading