diff --git a/python/Cargo.lock b/python/Cargo.lock index 019adf1..53f3ead 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -992,6 +992,7 @@ version = "0.1.0-beta.2" dependencies = [ "async-tiff", "bytes", + "futures", "object_store", "pyo3", "pyo3-async-runtimes", diff --git a/python/Cargo.toml b/python/Cargo.toml index ade899c..b359f71 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -19,6 +19,7 @@ crate-type = ["cdylib"] [dependencies] async-tiff = { path = "../" } bytes = "1.10.1" +futures = "0.3.31" object_store = "0.12" pyo3 = { version = "0.23.0", features = ["macros"] } pyo3-async-runtimes = "0.23" diff --git a/python/docs/api/tiff.md b/python/docs/api/tiff.md index 1059366..9ff50d7 100644 --- a/python/docs/api/tiff.md +++ b/python/docs/api/tiff.md @@ -3,3 +3,4 @@ ::: async_tiff.TIFF options: show_if_no_docstring: true +::: async_tiff.ObspecReader diff --git a/python/pyproject.toml b/python/pyproject.toml index d5da812..1cfc990 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "maturin" [project] name = "async-tiff" requires-python = ">=3.9" -dependencies = [] +dependencies = ["obspec>=0.1.0-beta.3"] dynamic = ["version"] classifiers = [ "Programming Language :: Rust", diff --git a/python/python/async_tiff/_async_tiff.pyi b/python/python/async_tiff/_async_tiff.pyi index f5babf4..05f5bc8 100644 --- a/python/python/async_tiff/_async_tiff.pyi +++ b/python/python/async_tiff/_async_tiff.pyi @@ -3,5 +3,6 @@ from ._decoder import DecoderRegistry as DecoderRegistry from ._geo import GeoKeyDirectory as GeoKeyDirectory from ._ifd import ImageFileDirectory as ImageFileDirectory from ._thread_pool import ThreadPool as ThreadPool +from ._tiff import ObspecInput as ObspecInput from ._tiff import TIFF as TIFF from ._tile import Tile as Tile diff --git a/python/python/async_tiff/_tiff.pyi b/python/python/async_tiff/_tiff.pyi index 0bf35ba..3e32f97 100644 --- a/python/python/async_tiff/_tiff.pyi +++ b/python/python/async_tiff/_tiff.pyi @@ -1,15 +1,21 @@ -import obstore +from typing import Protocol from ._tile import Tile from ._ifd import ImageFileDirectory from .store import ObjectStore +# Fix exports +from obspec._get import GetRangeAsync, GetRangesAsync + +class ObspecInput(GetRangeAsync, GetRangesAsync, Protocol): + """Supported obspec input to reader.""" + class TIFF: @classmethod async def open( cls, path: str, *, - store: obstore.store.ObjectStore | ObjectStore, + store: ObjectStore | ObspecInput, prefetch: int | None = 16384, ) -> TIFF: """Open a new TIFF. diff --git a/python/src/lib.rs b/python/src/lib.rs index 770b052..f3cd219 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -4,6 +4,7 @@ mod decoder; mod enums; mod geo; mod ifd; +mod reader; mod thread_pool; mod tiff; mod tile; diff --git a/python/src/reader.rs b/python/src/reader.rs new file mode 100644 index 0000000..73580d4 --- /dev/null +++ b/python/src/reader.rs @@ -0,0 +1,128 @@ +use std::ops::Range; +use std::sync::Arc; + +use async_tiff::error::{AsyncTiffError, AsyncTiffResult}; +use async_tiff::reader::{AsyncFileReader, ObjectReader}; +use bytes::Bytes; +use futures::future::BoxFuture; +use futures::FutureExt; +use pyo3::exceptions::PyTypeError; +use pyo3::intern; +use pyo3::prelude::*; +use pyo3::types::PyDict; +use pyo3_async_runtimes::tokio::into_future; +use pyo3_bytes::PyBytes; +use pyo3_object_store::PyObjectStore; + +#[derive(FromPyObject)] +pub(crate) enum StoreInput { + ObjectStore(PyObjectStore), + ObspecBackend(ObspecBackend), +} + +impl StoreInput { + pub(crate) fn into_async_file_reader(self, path: String) -> Arc { + match self { + Self::ObjectStore(store) => { + Arc::new(ObjectReader::new(store.into_inner(), path.into())) + } + Self::ObspecBackend(backend) => Arc::new(ObspecReader { backend, path }), + } + } +} + +/// A Python backend for making requests that conforms to the GetRangeAsync and GetRangesAsync +/// protocols defined by obspec. +/// https://developmentseed.org/obspec/latest/api/get/#obspec.GetRangeAsync +/// https://developmentseed.org/obspec/latest/api/get/#obspec.GetRangesAsync +#[derive(Debug)] +pub(crate) struct ObspecBackend(PyObject); + +impl ObspecBackend { + async fn get_range(&self, path: &str, range: Range) -> PyResult { + let future = Python::with_gil(|py| { + let kwargs = PyDict::new(py); + kwargs.set_item(intern!(py, "path"), path)?; + kwargs.set_item(intern!(py, "start"), range.start)?; + kwargs.set_item(intern!(py, "end"), range.end)?; + + let coroutine = self + .0 + .call_method(py, intern!(py, "get_range"), (), Some(&kwargs))?; + into_future(coroutine.bind(py).clone()) + })?; + let result = future.await?; + Python::with_gil(|py| result.extract(py)) + } + + async fn get_ranges(&self, path: &str, ranges: &[Range]) -> PyResult> { + let starts = ranges.iter().map(|r| r.start).collect::>(); + let ends = ranges.iter().map(|r| r.end).collect::>(); + + let future = Python::with_gil(|py| { + let kwargs = PyDict::new(py); + kwargs.set_item(intern!(py, "path"), path)?; + kwargs.set_item(intern!(py, "starts"), starts)?; + kwargs.set_item(intern!(py, "ends"), ends)?; + + let coroutine = self + .0 + .call_method(py, intern!(py, "get_range"), (), Some(&kwargs))?; + into_future(coroutine.bind(py).clone()) + })?; + let result = future.await?; + Python::with_gil(|py| result.extract(py)) + } + + async fn get_range_wrapper(&self, path: &str, range: Range) -> AsyncTiffResult { + let result = self + .get_range(path, range) + .await + .map_err(|err| AsyncTiffError::External(Box::new(err)))?; + Ok(result.into_inner()) + } + + async fn get_ranges_wrapper( + &self, + path: &str, + ranges: Vec>, + ) -> AsyncTiffResult> { + let result = self + .get_ranges(path, &ranges) + .await + .map_err(|err| AsyncTiffError::External(Box::new(err)))?; + Ok(result.into_iter().map(|b| b.into_inner()).collect()) + } +} + +impl<'py> FromPyObject<'py> for ObspecBackend { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { + let py = ob.py(); + if ob.hasattr(intern!(py, "get_range_async"))? + && ob.hasattr(intern!(py, "get_ranges_async"))? + { + Ok(Self(ob.clone().unbind())) + } else { + Err(PyTypeError::new_err("Expected obspec-compatible class with `get_range_async` and `get_ranges_async` method.")) + } + } +} + +#[derive(Debug)] +struct ObspecReader { + backend: ObspecBackend, + path: String, +} + +impl AsyncFileReader for ObspecReader { + fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.backend.get_range_wrapper(&self.path, range).boxed() + } + + fn get_byte_ranges( + &self, + ranges: Vec>, + ) -> BoxFuture<'_, AsyncTiffResult>> { + self.backend.get_ranges_wrapper(&self.path, ranges).boxed() + } +} diff --git a/python/src/tiff.rs b/python/src/tiff.rs index cba1041..0462a92 100644 --- a/python/src/tiff.rs +++ b/python/src/tiff.rs @@ -1,13 +1,13 @@ use std::sync::Arc; -use async_tiff::reader::{AsyncFileReader, ObjectReader, PrefetchReader}; +use async_tiff::reader::{AsyncFileReader, PrefetchReader}; use async_tiff::TIFF; use pyo3::exceptions::PyIndexError; use pyo3::prelude::*; use pyo3::types::PyType; use pyo3_async_runtimes::tokio::future_into_py; -use pyo3_object_store::AnyObjectStore; +use crate::reader::StoreInput; use crate::tile::PyTile; use crate::PyImageFileDirectory; @@ -25,25 +25,20 @@ impl PyTIFF { _cls: &'py Bound, py: Python<'py>, path: String, - store: AnyObjectStore, + store: StoreInput, prefetch: Option, ) -> PyResult> { - let reader = ObjectReader::new(store.into_dyn(), path.into()); - let object_reader = reader.clone(); + let reader = store.into_async_file_reader(path); let cog_reader = future_into_py(py, async move { - let reader: Box = if let Some(prefetch) = prefetch { - Box::new( - PrefetchReader::new(Box::new(reader), prefetch) - .await - .unwrap(), - ) + let reader: Arc = if let Some(prefetch) = prefetch { + Arc::new(PrefetchReader::new(reader, prefetch).await.unwrap()) } else { - Box::new(reader) + reader }; Ok(PyTIFF { - tiff: TIFF::try_open(reader).await.unwrap(), - reader: Arc::new(object_reader), + tiff: TIFF::try_open(reader.clone()).await.unwrap(), + reader, }) })?; Ok(cog_reader) diff --git a/python/uv.lock b/python/uv.lock index 4155f39..cd2ead3 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -26,6 +26,9 @@ wheels = [ [[package]] name = "async-tiff" source = { editable = "." } +dependencies = [ + { name = "obspec" }, +] [package.dev-dependencies] dev = [ @@ -48,6 +51,7 @@ dev = [ ] [package.metadata] +requires-dist = [{ name = "obspec", specifier = ">=0.1.0b3" }] [package.metadata.requires-dev] dev = [ @@ -974,6 +978,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/17/7f/d322a4125405920401450118dbdc52e0384026bd669939484670ce8b2ab9/numpy-2.2.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:783145835458e60fa97afac25d511d00a1eca94d4a8f3ace9fe2043003c678e4", size = 12839607 }, ] +[[package]] +name = "obspec" +version = "0.1.0b3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/c7/27e2de129314bed5419405d649bdf050569de5ab76236ece881869d7e489/obspec-0.1.0b3.tar.gz", hash = "sha256:085a4b977fc2c4b5ff8431c8c0a7634411390bf8d0d938ad0d8a0732bf28b6e3", size = 90810 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/4e/88974dfeff571b661127770f2a5a51ff638f96db4d3d391c044f3885b65c/obspec-0.1.0b3-py3-none-any.whl", hash = "sha256:f43ddfc79198ed614492c56c20e1c2a092d0ac368aef167aa3c6daa3115437e9", size = 15503 }, +] + [[package]] name = "obstore" version = "0.4.0" diff --git a/src/cog.rs b/src/cog.rs index 405507a..e9e3c5b 100644 --- a/src/cog.rs +++ b/src/cog.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use crate::error::AsyncTiffResult; use crate::ifd::ImageFileDirectories; use crate::reader::{AsyncCursor, AsyncFileReader}; @@ -13,7 +15,7 @@ impl TIFF { /// Open a new TIFF file. /// /// This will read all the Image File Directories (IFDs) in the file. - pub async fn try_open(reader: Box) -> AsyncTiffResult { + pub async fn try_open(reader: Arc) -> AsyncTiffResult { let mut cursor = AsyncCursor::try_open_tiff(reader).await?; let version = cursor.read_u16().await?; @@ -72,13 +74,13 @@ mod test { let folder = "/Users/kyle/github/developmentseed/async-tiff/"; let path = object_store::path::Path::parse("m_4007307_sw_18_060_20220803.tif").unwrap(); let store = Arc::new(LocalFileSystem::new_with_prefix(folder).unwrap()); - let reader = ObjectReader::new(store, path); + let reader = Arc::new(ObjectReader::new(store, path)); - let cog_reader = TIFF::try_open(Box::new(reader.clone())).await.unwrap(); + let cog_reader = TIFF::try_open(reader.clone()).await.unwrap(); let ifd = &cog_reader.ifds.as_ref()[1]; let decoder_registry = DecoderRegistry::default(); - let tile = ifd.fetch_tile(0, 0, &reader).await.unwrap(); + let tile = ifd.fetch_tile(0, 0, reader.as_ref()).await.unwrap(); let tile = tile.decode(&decoder_registry).unwrap(); std::fs::write("img.buf", tile).unwrap(); } diff --git a/src/error.rs b/src/error.rs index 47e80c5..9f5b7f0 100644 --- a/src/error.rs +++ b/src/error.rs @@ -34,6 +34,10 @@ pub enum AsyncTiffError { /// Reqwest error #[error(transparent)] ReqwestError(#[from] reqwest::Error), + + /// External error + #[error(transparent)] + External(Box), } /// Crate-specific result type. diff --git a/src/reader.rs b/src/reader.rs index 433e3d8..8563077 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -165,13 +165,13 @@ impl AsyncFileReader for ReqwestReader { /// An AsyncFileReader that caches the first `prefetch` bytes of a file. #[derive(Debug)] pub struct PrefetchReader { - reader: Box, + reader: Arc, buffer: Bytes, } impl PrefetchReader { /// Construct a new PrefetchReader, catching the first `prefetch` bytes of the file. - pub async fn new(reader: Box, prefetch: u64) -> AsyncTiffResult { + pub async fn new(reader: Arc, prefetch: u64) -> AsyncTiffResult { let buffer = reader.get_bytes(0..prefetch).await?; Ok(Self { reader, buffer }) } @@ -213,14 +213,14 @@ pub(crate) enum Endianness { // TODO: in the future add buffering to this #[derive(Debug)] pub(crate) struct AsyncCursor { - reader: Box, + reader: Arc, offset: u64, endianness: Endianness, } impl AsyncCursor { /// Create a new AsyncCursor from a reader and endianness. - pub(crate) fn new(reader: Box, endianness: Endianness) -> Self { + pub(crate) fn new(reader: Arc, endianness: Endianness) -> Self { Self { reader, offset: 0, @@ -230,7 +230,7 @@ impl AsyncCursor { /// Create a new AsyncCursor for a TIFF file, automatically inferring endianness from the first /// two bytes. - pub(crate) async fn try_open_tiff(reader: Box) -> AsyncTiffResult { + pub(crate) async fn try_open_tiff(reader: Arc) -> AsyncTiffResult { // Initialize with little endianness and then set later let mut cursor = Self::new(reader, Endianness::LittleEndian); let magic_bytes = cursor.read(2).await?; @@ -252,7 +252,7 @@ impl AsyncCursor { /// Consume self and return the underlying [`AsyncFileReader`]. #[allow(dead_code)] - pub(crate) fn into_inner(self) -> Box { + pub(crate) fn into_inner(self) -> Arc { self.reader } @@ -316,7 +316,7 @@ impl AsyncCursor { } #[allow(dead_code)] - pub(crate) fn reader(&self) -> &dyn AsyncFileReader { + pub(crate) fn reader(&self) -> &Arc { &self.reader } diff --git a/tests/image_tiff/util.rs b/tests/image_tiff/util.rs index 5755040..834ccc7 100644 --- a/tests/image_tiff/util.rs +++ b/tests/image_tiff/util.rs @@ -10,6 +10,6 @@ const TEST_IMAGE_DIR: &str = "tests/image_tiff/images/"; pub(crate) async fn open_tiff(filename: &str) -> TIFF { let store = Arc::new(LocalFileSystem::new_with_prefix(current_dir().unwrap()).unwrap()); let path = format!("{TEST_IMAGE_DIR}/{filename}"); - let reader = Box::new(ObjectReader::new(store.clone(), path.as_str().into())); + let reader = Arc::new(ObjectReader::new(store.clone(), path.as_str().into())); TIFF::try_open(reader).await.unwrap() }