Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api/store/index.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# ObjectStore

::: obstore.store.from_url
::: obstore.store.ObjectStore
78 changes: 78 additions & 0 deletions obstore/python/obstore/store/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# TODO: move to reusable types package
from pathlib import Path
from typing import Any, Unpack, overload

from ._aws import S3Config as S3Config
from ._aws import S3Store as S3Store
Expand All @@ -12,6 +13,83 @@ from ._http import HTTPStore as HTTPStore
from ._retry import BackoffConfig as BackoffConfig
from ._retry import RetryConfig as RetryConfig

@overload
def from_url(
url: str,
*,
config: S3Config | None = None,
client_options: ClientConfig | None = None,
retry_config: RetryConfig | None = None,
**kwargs: Unpack[S3Config],
) -> ObjectStore: ...
@overload
def from_url(
url: str,
*,
config: GCSConfig | None = None,
client_options: ClientConfig | None = None,
retry_config: RetryConfig | None = None,
**kwargs: Unpack[GCSConfig],
) -> ObjectStore: ...
@overload
def from_url(
url: str,
*,
config: AzureConfig | None = None,
client_options: ClientConfig | None = None,
retry_config: RetryConfig | None = None,
**kwargs: Unpack[AzureConfig],
) -> ObjectStore: ...
@overload
def from_url(
url: str,
*,
config: None = None,
client_options: None = None,
retry_config: None = None,
automatic_cleanup: bool = False,
mkdir: bool = False,
) -> ObjectStore: ...
def from_url(
url: str,
*,
config: S3Config | GCSConfig | AzureConfig | None = None,
client_options: ClientConfig | None = None,
retry_config: RetryConfig | None = None,
**kwargs: Any,
) -> ObjectStore:
"""Easy construction of store by URL, identifying the relevant store.

This will defer to a store-specific `from_url` constructor based on the provided
`url`. E.g. passing `"s3://bucket/path"` will defer to
[`S3Store.from_url`][obstore.store.S3Store.from_url].

Supported formats:

- `file:///path/to/my/file` -> [`LocalStore`][obstore.store.LocalStore]
- `memory:///` -> [`MemoryStore`][obstore.store.MemoryStore]
- `s3://bucket/path` -> [`S3Store`][obstore.store.S3Store] (also supports `s3a`)
- `gs://bucket/path` -> [`GCSStore`][obstore.store.GCSStore]
- `az://account/container/path` -> [`AzureStore`][obstore.store.AzureStore] (also supports `adl`, `azure`, `abfs`, `abfss`)
- `http://mydomain/path` -> [`HTTPStore`][obstore.store.HTTPStore]
- `https://mydomain/path` -> [`HTTPStore`][obstore.store.HTTPStore]

There are also special cases for AWS and Azure for `https://{host?}/path` paths:

- `dfs.core.windows.net`, `blob.core.windows.net`, `dfs.fabric.microsoft.com`, `blob.fabric.microsoft.com` -> [`AzureStore`][obstore.store.AzureStore]
- `amazonaws.com` -> [`S3Store`][obstore.store.S3Store]
- `r2.cloudflarestorage.com` -> [`S3Store`][obstore.store.S3Store]

Args:
url: well-known storage URL.

Keyword Args:
config: per-store Configuration. Values in this config will override values inferred from the url. Defaults to None.
client_options: HTTP Client options. Defaults to None.
retry_config: Retry configuration. Defaults to None.

"""

class LocalStore:
"""
Local filesystem storage providing an ObjectStore interface to files on local disk.
Expand Down
23 changes: 5 additions & 18 deletions obstore/src/signer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@ use pyo3::exceptions::PyValueError;
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::pybacked::PyBackedStr;
use pyo3::types::PyString;
use pyo3_object_store::{
MaybePrefixedStore, PyAzureStore, PyGCSStore, PyObjectStoreError, PyObjectStoreResult,
PyS3Store,
PyS3Store, PyUrl,
};
use url::Url;

Expand Down Expand Up @@ -139,18 +138,6 @@ impl<'py> FromPyObject<'py> for PyMethod {
}
}

pub(crate) struct PyUrl(url::Url);

impl<'py> IntoPyObject<'py> for PyUrl {
type Target = PyString;
type Output = Bound<'py, PyString>;
type Error = std::convert::Infallible;

fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
String::from(self.0).into_pyobject(py)
}
}

#[derive(IntoPyObject)]
pub(crate) struct PyUrls(Vec<PyUrl>);

Expand All @@ -174,12 +161,12 @@ pub(crate) fn sign(
py.allow_threads(|| match paths {
PyPaths::One(path) => {
let url = runtime.block_on(store.signed_url(method, &path, expires_in))?;
Ok(PySignResult::One(PyUrl(url)))
Ok(PySignResult::One(PyUrl::new(url)))
}
PyPaths::Many(paths) => {
let urls = runtime.block_on(store.signed_urls(method, &paths, expires_in))?;
Ok(PySignResult::Many(PyUrls(
urls.into_iter().map(PyUrl).collect(),
urls.into_iter().map(PyUrl::new).collect(),
)))
}
})
Expand All @@ -201,15 +188,15 @@ pub(crate) fn sign_async(
.signed_url(method, &path, expires_in)
.await
.map_err(PyObjectStoreError::ObjectStoreError)?;
Ok(PySignResult::One(PyUrl(url)))
Ok(PySignResult::One(PyUrl::new(url)))
}
PyPaths::Many(paths) => {
let urls = store
.signed_urls(method, &paths, expires_in)
.await
.map_err(PyObjectStoreError::ObjectStoreError)?;
Ok(PySignResult::Many(PyUrls(
urls.into_iter().map(PyUrl).collect(),
urls.into_iter().map(PyUrl::new).collect(),
)))
}
}
Expand Down
5 changes: 4 additions & 1 deletion pyo3-object_store/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ use pyo3::intern;
use pyo3::prelude::*;

use crate::error::*;
use crate::{PyAzureStore, PyGCSStore, PyHttpStore, PyLocalStore, PyMemoryStore, PyS3Store};
use crate::{
from_url, PyAzureStore, PyGCSStore, PyHttpStore, PyLocalStore, PyMemoryStore, PyS3Store,
};

/// Export the default Python API as a submodule named `store` within the given parent module
///
Expand Down Expand Up @@ -44,6 +46,7 @@ pub fn register_store_module(

let child_module = PyModule::new(parent_module.py(), "store")?;

child_module.add_wrapped(wrap_pyfunction!(from_url))?;
child_module.add_class::<PyAzureStore>()?;
child_module.add_class::<PyGCSStore>()?;
child_module.add_class::<PyHttpStore>()?;
Expand Down
18 changes: 16 additions & 2 deletions pyo3-object_store/src/aws.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ impl PyS3Store {

#[classmethod]
#[pyo3(signature = (url, *, config=None, client_options=None, retry_config=None, **kwargs))]
fn from_url(
pub(crate) fn from_url(
_cls: &Bound<PyType>,
url: PyUrl,
config: Option<PyAmazonS3Config>,
Expand Down Expand Up @@ -274,6 +274,12 @@ impl<'py> FromPyObject<'py> for PyAmazonS3ConfigKey {
}
}

impl AsRef<str> for PyAmazonS3ConfigKey {
fn as_ref(&self) -> &str {
self.0.as_ref()
}
}

impl<'py> IntoPyObject<'py> for PyAmazonS3ConfigKey {
type Target = PyString;
type Output = Bound<'py, PyString>;
Expand All @@ -284,9 +290,17 @@ impl<'py> IntoPyObject<'py> for PyAmazonS3ConfigKey {
}
}

#[derive(Clone, Debug, Default, PartialEq, Eq, FromPyObject, IntoPyObject)]
#[derive(Clone, Debug, Default, PartialEq, Eq, IntoPyObject)]
pub struct PyAmazonS3Config(HashMap<PyAmazonS3ConfigKey, PyConfigValue>);

// Note: we manually impl FromPyObject instead of deriving it so that we can raise an
// UnknownConfigurationKeyError instead of a `TypeError` on invalid config keys.
impl<'py> FromPyObject<'py> for PyAmazonS3Config {
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
Ok(Self(ob.extract()?))
}
}

impl PyAmazonS3Config {
fn apply_config(self, mut builder: AmazonS3Builder) -> AmazonS3Builder {
for (key, value) in self.0.into_iter() {
Expand Down
18 changes: 16 additions & 2 deletions pyo3-object_store/src/azure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ impl PyAzureStore {

#[classmethod]
#[pyo3(signature = (url, *, config=None, client_options=None, retry_config=None, **kwargs))]
fn from_url(
pub(crate) fn from_url(
_cls: &Bound<PyType>,
url: PyUrl,
config: Option<PyAzureConfig>,
Expand Down Expand Up @@ -199,6 +199,12 @@ impl<'py> FromPyObject<'py> for PyAzureConfigKey {
}
}

impl AsRef<str> for PyAzureConfigKey {
fn as_ref(&self) -> &str {
self.0.as_ref()
}
}

impl<'py> IntoPyObject<'py> for PyAzureConfigKey {
type Target = PyString;
type Output = Bound<'py, PyString>;
Expand All @@ -209,9 +215,17 @@ impl<'py> IntoPyObject<'py> for PyAzureConfigKey {
}
}

#[derive(Clone, Debug, PartialEq, Eq, FromPyObject, IntoPyObject)]
#[derive(Clone, Debug, PartialEq, Eq, IntoPyObject)]
pub struct PyAzureConfig(HashMap<PyAzureConfigKey, PyConfigValue>);

// Note: we manually impl FromPyObject instead of deriving it so that we can raise an
// UnknownConfigurationKeyError instead of a `TypeError` on invalid config keys.
impl<'py> FromPyObject<'py> for PyAzureConfig {
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
Ok(Self(ob.extract()?))
}
}

impl PyAzureConfig {
fn apply_config(self, mut builder: MicrosoftAzureBuilder) -> MicrosoftAzureBuilder {
for (key, value) in self.0.into_iter() {
Expand Down
6 changes: 6 additions & 0 deletions pyo3-object_store/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,9 @@ impl<'py> FromPyObject<'py> for PyConfigValue {
}
}
}

impl From<PyConfigValue> for String {
fn from(value: PyConfigValue) -> Self {
value.0
}
}
18 changes: 16 additions & 2 deletions pyo3-object_store/src/gcp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ impl PyGCSStore {

#[classmethod]
#[pyo3(signature = (url, *, config=None, client_options=None, retry_config=None, **kwargs))]
fn from_url(
pub(crate) fn from_url(
_cls: &Bound<PyType>,
url: PyUrl,
config: Option<PyGoogleConfig>,
Expand Down Expand Up @@ -197,6 +197,12 @@ impl<'py> FromPyObject<'py> for PyGoogleConfigKey {
}
}

impl AsRef<str> for PyGoogleConfigKey {
fn as_ref(&self) -> &str {
self.0.as_ref()
}
}

impl<'py> IntoPyObject<'py> for PyGoogleConfigKey {
type Target = PyString;
type Output = Bound<'py, PyString>;
Expand All @@ -207,9 +213,17 @@ impl<'py> IntoPyObject<'py> for PyGoogleConfigKey {
}
}

#[derive(Clone, Debug, PartialEq, Eq, FromPyObject, IntoPyObject)]
#[derive(Clone, Debug, PartialEq, Eq, IntoPyObject)]
pub struct PyGoogleConfig(HashMap<PyGoogleConfigKey, PyConfigValue>);

// Note: we manually impl FromPyObject instead of deriving it so that we can raise an
// UnknownConfigurationKeyError instead of a `TypeError` on invalid config keys.
impl<'py> FromPyObject<'py> for PyGoogleConfig {
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
Ok(Self(ob.extract()?))
}
}

impl PyGoogleConfig {
fn apply_config(self, mut builder: GoogleCloudStorageBuilder) -> GoogleCloudStorageBuilder {
for (key, value) in self.0.into_iter() {
Expand Down
12 changes: 6 additions & 6 deletions pyo3-object_store/src/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ use pyo3::{intern, IntoPyObjectExt};

use crate::error::PyObjectStoreResult;
use crate::retry::PyRetryConfig;
use crate::PyClientOptions;
use crate::{PyClientOptions, PyUrl};

struct HTTPConfig {
url: String,
url: PyUrl,
client_options: Option<PyClientOptions>,
retry_config: Option<PyRetryConfig>,
}
Expand Down Expand Up @@ -59,7 +59,7 @@ impl PyHttpStore {
#[new]
#[pyo3(signature = (url, *, client_options=None, retry_config=None))]
fn new(
url: String,
url: PyUrl,
client_options: Option<PyClientOptions>,
retry_config: Option<PyRetryConfig>,
) -> PyObjectStoreResult<Self> {
Expand All @@ -82,9 +82,9 @@ impl PyHttpStore {

#[classmethod]
#[pyo3(signature = (url, *, client_options=None, retry_config=None))]
fn from_url(
pub(crate) fn from_url(
_cls: &Bound<PyType>,
url: String,
url: PyUrl,
client_options: Option<PyClientOptions>,
retry_config: Option<PyRetryConfig>,
) -> PyObjectStoreResult<Self> {
Expand All @@ -96,6 +96,6 @@ impl PyHttpStore {
}

fn __repr__(&self) -> String {
format!("HTTPStore(\"{}\")", &self.config.url)
format!("HTTPStore(\"{}\")", &self.config.url.as_ref())
}
}
2 changes: 2 additions & 0 deletions pyo3-object_store/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ mod memory;
mod path;
mod prefix;
mod retry;
mod simple;
mod store;
mod url;

Expand All @@ -27,5 +28,6 @@ pub use http::PyHttpStore;
pub use local::PyLocalStore;
pub use memory::PyMemoryStore;
pub use prefix::MaybePrefixedStore;
pub use simple::from_url;
pub use store::PyObjectStore;
pub use url::PyUrl;
8 changes: 4 additions & 4 deletions pyo3-object_store/src/local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::types::{PyDict, PyTuple, PyType};
use pyo3::{intern, IntoPyObjectExt};
use url::Url;

use crate::error::PyObjectStoreResult;
use crate::PyUrl;

#[derive(Clone, Debug)]
struct LocalConfig {
Expand Down Expand Up @@ -79,13 +79,13 @@ impl PyLocalStore {

#[classmethod]
#[pyo3(signature = (url, *, automatic_cleanup=false, mkdir=false))]
fn from_url(
pub(crate) fn from_url(
_cls: &Bound<PyType>,
url: &str,
url: PyUrl,
automatic_cleanup: bool,
mkdir: bool,
) -> PyObjectStoreResult<Self> {
let url = Url::parse(url).map_err(|err| PyValueError::new_err(err.to_string()))?;
let url = url.into_inner();
let (scheme, path) = ObjectStoreScheme::parse(&url).map_err(object_store::Error::from)?;

if !matches!(scheme, ObjectStoreScheme::Local) {
Expand Down
Loading