Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api/store/index.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# ObjectStore

::: obstore.store.new_store
::: obstore.store.ObjectStore
35 changes: 35 additions & 0 deletions obstore/python/obstore/store/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# TODO: move to reusable types package
from pathlib import Path
from typing import Any, Unpack, overload

from ._aws import S3Config as S3Config
from ._aws import S3Store as S3Store
Expand All @@ -13,6 +14,40 @@ from ._prefix import PrefixStore as PrefixStore
from ._retry import BackoffConfig as BackoffConfig
from ._retry import RetryConfig as RetryConfig

@overload
def new_store(
url: str, *, config: S3Config | None = None, **kwargs: Unpack[S3Config]
) -> PrefixStore: ...
@overload
def new_store(
url: str, *, config: GCSConfig | None = None, **kwargs: Unpack[GCSConfig]
) -> PrefixStore: ...
@overload
def new_store(
url: str, *, config: AzureConfig | None = None, **kwargs: Unpack[AzureConfig]
) -> PrefixStore: ...
def new_store(
url: str, *, config: S3Config | GCSConfig | AzureConfig | None = None, **kwargs: Any
) -> PrefixStore:
"""Easy construction of store by URL, identifying the relevant store.

Supported formats:

- `file:///path/to/my/file` -> [`LocalStore`][obstore.store.LocalStore]
- `memory:///` -> [`MemoryStore`][obstore.store.MemoryStore]
- `s3://bucket/path` -> [`S3Store`][obstore.store.S3Store] (also supports `s3a`)
- `gs://bucket/path` -> [`GCSStore`][obstore.store.GCSStore]
- `az://account/container/path` -> [`AzureStore`][obstore.store.AzureStore] (also supports `adl`, `azure`, `abfs`, `abfss`)
- `http://mydomain/path` -> [`HTTPStore`][obstore.store.HTTPStore]
- `https://mydomain/path` -> [`HTTPStore`][obstore.store.HTTPStore]

There are also special cases for AWS and Azure for `https://{host?}/path` paths:

- `dfs.core.windows.net`, `blob.core.windows.net`, `dfs.fabric.microsoft.com`, `blob.fabric.microsoft.com` -> [`AzureStore`][obstore.store.AzureStore]
- `amazonaws.com` -> [`S3Store`][obstore.store.S3Store]
- `r2.cloudflarestorage.com` -> [`S3Store`][obstore.store.S3Store]
"""

class LocalStore:
"""
Local filesystem storage providing an ObjectStore interface to files on local disk.
Expand Down
4 changes: 3 additions & 1 deletion pyo3-object_store/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ use pyo3::prelude::*;

use crate::error::*;
use crate::{
PyAzureStore, PyGCSStore, PyHttpStore, PyLocalStore, PyMemoryStore, PyPrefixStore, PyS3Store,
new_store, PyAzureStore, PyGCSStore, PyHttpStore, PyLocalStore, PyMemoryStore, PyPrefixStore,
PyS3Store,
};

/// Export the default Python API as a submodule named `store` within the given parent module
Expand Down Expand Up @@ -46,6 +47,7 @@ pub fn register_store_module(

let child_module = PyModule::new(parent_module.py(), "store")?;

child_module.add_wrapped(wrap_pyfunction!(new_store))?;
child_module.add_class::<PyAzureStore>()?;
child_module.add_class::<PyGCSStore>()?;
child_module.add_class::<PyHttpStore>()?;
Expand Down
14 changes: 12 additions & 2 deletions pyo3-object_store/src/aws.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,13 @@ impl<'py> FromPyObject<'py> for PyAmazonS3ConfigKey {
}
}

#[derive(Clone, Debug, PartialEq, Eq)]
impl AsRef<str> for PyAmazonS3ConfigKey {
fn as_ref(&self) -> &str {
self.0.as_ref()
}
}

#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct PyAmazonS3Config(HashMap<PyAmazonS3ConfigKey, PyConfigValue>);

impl<'py> FromPyObject<'py> for PyAmazonS3Config {
Expand All @@ -199,6 +205,10 @@ impl PyAmazonS3Config {
builder
}

pub(crate) fn into_inner(self) -> HashMap<PyAmazonS3ConfigKey, PyConfigValue> {
self.0
}

fn merge(mut self, other: PyAmazonS3Config) -> PyObjectStoreResult<PyAmazonS3Config> {
for (k, v) in other.0.into_iter() {
let old_value = self.0.insert(k.clone(), v);
Expand All @@ -215,7 +225,7 @@ impl PyAmazonS3Config {
}
}

fn combine_config_kwargs(
pub(crate) fn combine_config_kwargs(
config: Option<PyAmazonS3Config>,
kwargs: Option<PyAmazonS3Config>,
) -> PyObjectStoreResult<Option<PyAmazonS3Config>> {
Expand Down
14 changes: 12 additions & 2 deletions pyo3-object_store/src/azure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,13 @@ impl<'py> FromPyObject<'py> for PyAzureConfigKey {
}
}

#[derive(Clone, Debug, PartialEq, Eq)]
impl AsRef<str> for PyAzureConfigKey {
fn as_ref(&self) -> &str {
self.0.as_ref()
}
}

#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct PyAzureConfig(HashMap<PyAzureConfigKey, PyConfigValue>);

impl<'py> FromPyObject<'py> for PyAzureConfig {
Expand All @@ -135,6 +141,10 @@ impl PyAzureConfig {
builder
}

pub(crate) fn into_inner(self) -> HashMap<PyAzureConfigKey, PyConfigValue> {
self.0
}

fn merge(mut self, other: PyAzureConfig) -> PyObjectStoreResult<PyAzureConfig> {
for (k, v) in other.0.into_iter() {
let old_value = self.0.insert(k.clone(), v);
Expand All @@ -151,7 +161,7 @@ impl PyAzureConfig {
}
}

fn combine_config_kwargs(
pub(crate) fn combine_config_kwargs(
config: Option<PyAzureConfig>,
kwargs: Option<PyAzureConfig>,
) -> PyObjectStoreResult<Option<PyAzureConfig>> {
Expand Down
6 changes: 6 additions & 0 deletions pyo3-object_store/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,9 @@ impl<'py> FromPyObject<'py> for PyConfigValue {
}
}
}

impl From<PyConfigValue> for String {
fn from(value: PyConfigValue) -> Self {
value.0
}
}
14 changes: 12 additions & 2 deletions pyo3-object_store/src/gcp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,13 @@ impl<'py> FromPyObject<'py> for PyGoogleConfigKey {
}
}

#[derive(Clone, Debug, PartialEq, Eq)]
impl AsRef<str> for PyGoogleConfigKey {
fn as_ref(&self) -> &str {
self.0.as_ref()
}
}

#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct PyGoogleConfig(HashMap<PyGoogleConfigKey, PyConfigValue>);

impl<'py> FromPyObject<'py> for PyGoogleConfig {
Expand All @@ -135,6 +141,10 @@ impl PyGoogleConfig {
builder
}

pub(crate) fn into_inner(self) -> HashMap<PyGoogleConfigKey, PyConfigValue> {
self.0
}

fn merge(mut self, other: PyGoogleConfig) -> PyObjectStoreResult<PyGoogleConfig> {
for (k, v) in other.0.into_iter() {
let old_value = self.0.insert(k.clone(), v);
Expand All @@ -151,7 +161,7 @@ impl PyGoogleConfig {
}
}

fn combine_config_kwargs(
pub(crate) fn combine_config_kwargs(
config: Option<PyGoogleConfig>,
kwargs: Option<PyGoogleConfig>,
) -> PyObjectStoreResult<Option<PyGoogleConfig>> {
Expand Down
3 changes: 3 additions & 0 deletions pyo3-object_store/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ mod local;
mod memory;
mod prefix;
mod retry;
mod simple;
mod store;
mod url;

pub use api::{register_exceptions_module, register_store_module};
pub use aws::PyS3Store;
Expand All @@ -25,4 +27,5 @@ pub use http::PyHttpStore;
pub use local::PyLocalStore;
pub use memory::PyMemoryStore;
pub use prefix::PyPrefixStore;
pub use simple::new_store;
pub use store::PyObjectStore;
6 changes: 6 additions & 0 deletions pyo3-object_store/src/prefix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ impl AsRef<Arc<PrefixStore<Arc<dyn ObjectStore>>>> for PyPrefixStore {
}
}

impl From<PrefixStore<Arc<dyn ObjectStore>>> for PyPrefixStore {
fn from(value: PrefixStore<Arc<dyn ObjectStore>>) -> Self {
PyPrefixStore(Arc::new(value))
}
}

#[pymethods]
impl PyPrefixStore {
#[new]
Expand Down
60 changes: 60 additions & 0 deletions pyo3-object_store/src/simple.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
use object_store::prefix::PrefixStore;
use object_store::{parse_url, parse_url_opts, ObjectStoreScheme};
use pyo3::prelude::*;

use crate::error::ObstoreError;
use crate::url::PyUrl;
use crate::{PyObjectStoreResult, PyPrefixStore};

/// Wrapper around [`object_store::parse_url`] for simple construction of stores by url.
// Note: We don't extract the PyObject in the function signature because it's possible that
// AWS/Azure/Google config keys could overlap. And so we don't want to accidentally parse a config
// as an AWS config before knowing that the URL scheme is AWS.
#[pyfunction]
#[pyo3(signature = (url, *, config=None, **kwargs))]
pub fn new_store(
url: PyUrl,
config: Option<Bound<PyAny>>,
kwargs: Option<Bound<PyAny>>,
) -> PyObjectStoreResult<PyPrefixStore> {
let url = url.into_inner();
let (scheme, _) = ObjectStoreScheme::parse(&url).map_err(object_store::Error::from)?;
let (store, path) = match scheme {
ObjectStoreScheme::AmazonS3 => {
let combined = crate::aws::combine_config_kwargs(
config.map(|x| x.extract()).transpose()?,
kwargs.map(|x| x.extract()).transpose()?,
)?
.unwrap_or_default();
parse_url_opts(&url, combined.into_inner())?
}
ObjectStoreScheme::GoogleCloudStorage => {
let combined = crate::gcp::combine_config_kwargs(
config.map(|x| x.extract()).transpose()?,
kwargs.map(|x| x.extract()).transpose()?,
)?
.unwrap_or_default();
parse_url_opts(&url, combined.into_inner())?
}
ObjectStoreScheme::MicrosoftAzure => {
let combined = crate::azure::combine_config_kwargs(
config.map(|x| x.extract()).transpose()?,
kwargs.map(|x| x.extract()).transpose()?,
)?
.unwrap_or_default();
parse_url_opts(&url, combined.into_inner())?
}
scheme => {
if config.is_some() || kwargs.is_some() {
return Err(ObstoreError::new_err(format!(
"Cannot pass config or keyword parameters for scheme {:?}",
scheme,
))
.into());
}
parse_url(&url)?
}
};

Ok(PrefixStore::new(store.into(), path).into())
}
1 change: 0 additions & 1 deletion pyo3-object_store/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ impl<'py> FromPyObject<'py> for PyObjectStore {
return Err(PyValueError::new_err("You must use an object store instance exported from **the same library** as this function. They cannot be used across libraries.\nThis is because object store instances are compiled with a specific version of Rust and Python." ));
}

// TODO: Check for fsspec
Err(PyValueError::new_err(format!(
"Expected an object store instance, got {}",
ob.repr()?
Expand Down
28 changes: 28 additions & 0 deletions pyo3-object_store/src/url.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
use pyo3::exceptions::PyValueError;
use pyo3::pybacked::PyBackedStr;
use pyo3::types::PyAnyMethods;
use pyo3::FromPyObject;
use url::Url;

/// A wrapper around [`url::Url`] that implements [`FromPyObject`].
pub struct PyUrl(Url);

impl PyUrl {
pub(crate) fn into_inner(self) -> Url {
self.0
}
}

impl<'py> FromPyObject<'py> for PyUrl {
fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult<Self> {
let s = ob.extract::<PyBackedStr>()?;
let url = Url::parse(&s).map_err(|err| PyValueError::new_err(err.to_string()))?;
Ok(Self(url))
}
}

impl AsRef<Url> for PyUrl {
fn as_ref(&self) -> &Url {
&self.0
}
}
58 changes: 58 additions & 0 deletions tests/store/test_from_url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from pathlib import Path

import pytest

import obstore as obs
from obstore.exceptions import ObstoreError, UnknownConfigurationKeyError
from obstore.store import new_store


def test_local():
cwd = Path(".").absolute()
url = f"file://{cwd}"
store = new_store(url)
cwd_files = obs.list(store).collect()
assert any(file["path"] == "test_from_url.py" for file in cwd_files)


def test_memory():
url = "memory:///"
_store = new_store(url)

with pytest.raises(ObstoreError):
new_store(url, aws_access_key_id="test")


def test_s3_params():
new_store(
"s3://bucket/path",
access_key_id="access_key_id",
secret_access_key="secret_access_key",
)

with pytest.raises(UnknownConfigurationKeyError):
new_store("s3://bucket/path", azure_authority_id="")


def test_gcs_params():
# Just to test the params. In practice, the bucket shouldn't be passed
new_store("gs://test.example.com/path", google_bucket="test_bucket")

with pytest.raises(UnknownConfigurationKeyError):
new_store("gs://test.example.com/path", azure_authority_id="")


def test_azure_params():
url = "abfs://container@account.dfs.core.windows.net/path"
new_store(url, azure_skip_signature=True)

with pytest.raises(UnknownConfigurationKeyError):
new_store(url, aws_bucket="test")


def test_http():
url = "https://mydomain/path"
new_store(url)

with pytest.raises(ObstoreError):
new_store(url, aws_bucket="test")
12 changes: 11 additions & 1 deletion tests/store/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import obstore as obs
from obstore.exceptions import ObstoreError
from obstore.store import S3Store
from obstore.store import S3Store, new_store


@pytest.mark.asyncio
Expand Down Expand Up @@ -33,3 +33,13 @@ def test_error_overlapping_config_kwargs():

with pytest.raises(ObstoreError, match="Duplicate key"):
S3Store("bucket", config={"AWS_SKIP_SIGNATURE": True}, skip_signature=True)


@pytest.mark.asyncio
async def test_new_store():
store = new_store(
"s3://ookla-open-data/parquet/performance/type=fixed/year=2024/quarter=1",
region="us-west-2",
skip_signature=True,
)
_meta = await obs.head_async(store, "2024-01-01_performance_fixed_tiles.parquet")
Loading