Skip to content

Commit fbd8719

Browse files
authored
fix!: Don't percent-encode paths (#524)
* fix: Support percent-encoded paths * lint
1 parent cafb35a commit fbd8719

File tree

8 files changed

+70
-39
lines changed

8 files changed

+70
-39
lines changed

obstore/src/buffered.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use pyo3::types::PyString;
1010
use pyo3::{intern, IntoPyObjectExt};
1111
use pyo3_async_runtimes::tokio::{future_into_py, get_runtime};
1212
use pyo3_bytes::PyBytes;
13-
use pyo3_object_store::{PyObjectStore, PyObjectStoreError, PyObjectStoreResult};
13+
use pyo3_object_store::{PyObjectStore, PyObjectStoreError, PyObjectStoreResult, PyPath};
1414
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncSeekExt, AsyncWriteExt, Lines};
1515
use tokio::sync::Mutex;
1616

@@ -23,7 +23,7 @@ use crate::tags::PyTagSet;
2323
pub(crate) fn open_reader(
2424
py: Python,
2525
store: PyObjectStore,
26-
path: String,
26+
path: PyPath,
2727
buffer_size: usize,
2828
) -> PyObjectStoreResult<PyReadableFile> {
2929
let store = store.into_inner();
@@ -38,7 +38,7 @@ pub(crate) fn open_reader(
3838
pub(crate) fn open_reader_async(
3939
py: Python,
4040
store: PyObjectStore,
41-
path: String,
41+
path: PyPath,
4242
buffer_size: usize,
4343
) -> PyResult<Bound<PyAny>> {
4444
let store = store.into_inner();
@@ -50,11 +50,11 @@ pub(crate) fn open_reader_async(
5050

5151
async fn create_reader(
5252
store: Arc<dyn ObjectStore>,
53-
path: String,
53+
path: PyPath,
5454
capacity: usize,
5555
) -> PyObjectStoreResult<(BufReader, ObjectMeta)> {
5656
let meta = store
57-
.head(&path.into())
57+
.head(path.as_ref())
5858
.await
5959
.map_err(PyObjectStoreError::ObjectStoreError)?;
6060
Ok((BufReader::with_capacity(store, &meta, capacity), meta))
@@ -286,7 +286,7 @@ async fn next_line(reader: Arc<Mutex<Lines<BufReader>>>, r#async: bool) -> PyRes
286286
#[pyo3(signature = (store, path, *, attributes=None, buffer_size=10 * 1024 * 1024, tags=None, max_concurrency=12))]
287287
pub(crate) fn open_writer(
288288
store: PyObjectStore,
289-
path: String,
289+
path: PyPath,
290290
attributes: Option<PyAttributes>,
291291
buffer_size: usize,
292292
tags: Option<PyTagSet>,
@@ -302,7 +302,7 @@ pub(crate) fn open_writer(
302302
#[pyo3(signature = (store, path, *, attributes=None, buffer_size=10 * 1024 * 1024, tags=None, max_concurrency=12))]
303303
pub(crate) fn open_writer_async(
304304
store: PyObjectStore,
305-
path: String,
305+
path: PyPath,
306306
attributes: Option<PyAttributes>,
307307
buffer_size: usize,
308308
tags: Option<PyTagSet>,
@@ -316,7 +316,7 @@ pub(crate) fn open_writer_async(
316316

317317
fn create_writer(
318318
store: PyObjectStore,
319-
path: String,
319+
path: PyPath,
320320
attributes: Option<PyAttributes>,
321321
capacity: usize,
322322
tags: Option<PyTagSet>,

obstore/src/get.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use pyo3::exceptions::{PyStopAsyncIteration, PyStopIteration, PyValueError};
1111
use pyo3::prelude::*;
1212
use pyo3_async_runtimes::tokio::get_runtime;
1313
use pyo3_bytes::PyBytes;
14-
use pyo3_object_store::{PyObjectStore, PyObjectStoreError, PyObjectStoreResult};
14+
use pyo3_object_store::{PyObjectStore, PyObjectStoreError, PyObjectStoreResult, PyPath};
1515
use tokio::sync::Mutex;
1616

1717
use crate::attributes::PyAttributes;
@@ -334,12 +334,12 @@ impl<'py> IntoPyObject<'py> for PyBytesWrapper {
334334
pub(crate) fn get(
335335
py: Python,
336336
store: PyObjectStore,
337-
path: String,
337+
path: PyPath,
338338
options: Option<PyGetOptions>,
339339
) -> PyObjectStoreResult<PyGetResult> {
340340
let runtime = get_runtime();
341341
py.allow_threads(|| {
342-
let path = &path.into();
342+
let path = &path.as_ref();
343343
let fut = if let Some(options) = options {
344344
store.as_ref().get_opts(path, options.into())
345345
} else {
@@ -355,11 +355,11 @@ pub(crate) fn get(
355355
pub(crate) fn get_async(
356356
py: Python,
357357
store: PyObjectStore,
358-
path: String,
358+
path: PyPath,
359359
options: Option<PyGetOptions>,
360360
) -> PyResult<Bound<PyAny>> {
361361
pyo3_async_runtimes::tokio::future_into_py(py, async move {
362-
let path = &path.into();
362+
let path = &path.as_ref();
363363
let fut = if let Some(options) = options {
364364
store.as_ref().get_opts(path, options.into())
365365
} else {
@@ -375,15 +375,15 @@ pub(crate) fn get_async(
375375
pub(crate) fn get_range(
376376
py: Python,
377377
store: PyObjectStore,
378-
path: String,
378+
path: PyPath,
379379
start: u64,
380380
end: Option<u64>,
381381
length: Option<u64>,
382382
) -> PyObjectStoreResult<pyo3_bytes::PyBytes> {
383383
let runtime = get_runtime();
384384
let range = params_to_range(start, end, length)?;
385385
py.allow_threads(|| {
386-
let out = runtime.block_on(store.as_ref().get_range(&path.into(), range))?;
386+
let out = runtime.block_on(store.as_ref().get_range(path.as_ref(), range))?;
387387
Ok::<_, PyObjectStoreError>(pyo3_bytes::PyBytes::new(out))
388388
})
389389
}
@@ -393,7 +393,7 @@ pub(crate) fn get_range(
393393
pub(crate) fn get_range_async(
394394
py: Python,
395395
store: PyObjectStore,
396-
path: String,
396+
path: PyPath,
397397
start: u64,
398398
end: Option<u64>,
399399
length: Option<u64>,
@@ -402,7 +402,7 @@ pub(crate) fn get_range_async(
402402
pyo3_async_runtimes::tokio::future_into_py(py, async move {
403403
let out = store
404404
.as_ref()
405-
.get_range(&path.into(), range)
405+
.get_range(path.as_ref(), range)
406406
.await
407407
.map_err(PyObjectStoreError::ObjectStoreError)?;
408408
Ok(pyo3_bytes::PyBytes::new(out))
@@ -429,15 +429,15 @@ fn params_to_range(
429429
pub(crate) fn get_ranges(
430430
py: Python,
431431
store: PyObjectStore,
432-
path: String,
432+
path: PyPath,
433433
starts: Vec<u64>,
434434
ends: Option<Vec<u64>>,
435435
lengths: Option<Vec<u64>>,
436436
) -> PyObjectStoreResult<Vec<pyo3_bytes::PyBytes>> {
437437
let runtime = get_runtime();
438438
let ranges = params_to_ranges(starts, ends, lengths)?;
439439
py.allow_threads(|| {
440-
let out = runtime.block_on(store.as_ref().get_ranges(&path.into(), &ranges))?;
440+
let out = runtime.block_on(store.as_ref().get_ranges(path.as_ref(), &ranges))?;
441441
Ok::<_, PyObjectStoreError>(out.into_iter().map(|buf| buf.into()).collect())
442442
})
443443
}
@@ -447,7 +447,7 @@ pub(crate) fn get_ranges(
447447
pub(crate) fn get_ranges_async(
448448
py: Python,
449449
store: PyObjectStore,
450-
path: String,
450+
path: PyPath,
451451
starts: Vec<u64>,
452452
ends: Option<Vec<u64>>,
453453
lengths: Option<Vec<u64>>,
@@ -456,7 +456,7 @@ pub(crate) fn get_ranges_async(
456456
pyo3_async_runtimes::tokio::future_into_py(py, async move {
457457
let out = store
458458
.as_ref()
459-
.get_ranges(&path.into(), &ranges)
459+
.get_ranges(path.as_ref(), &ranges)
460460
.await
461461
.map_err(PyObjectStoreError::ObjectStoreError)?;
462462
Ok(out

obstore/src/head.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
11
use pyo3::prelude::*;
22
use pyo3_async_runtimes::tokio::get_runtime;
3-
use pyo3_object_store::{PyObjectStore, PyObjectStoreError, PyObjectStoreResult};
3+
use pyo3_object_store::{PyObjectStore, PyObjectStoreError, PyObjectStoreResult, PyPath};
44

55
use crate::list::PyObjectMeta;
66

77
#[pyfunction]
8-
pub fn head(py: Python, store: PyObjectStore, path: String) -> PyObjectStoreResult<PyObjectMeta> {
8+
pub fn head(py: Python, store: PyObjectStore, path: PyPath) -> PyObjectStoreResult<PyObjectMeta> {
99
let runtime = get_runtime();
1010
let store = store.into_inner();
1111

1212
py.allow_threads(|| {
13-
let meta = runtime.block_on(store.head(&path.into()))?;
13+
let meta = runtime.block_on(store.head(path.as_ref()))?;
1414
Ok::<_, PyObjectStoreError>(PyObjectMeta::new(meta))
1515
})
1616
}
1717

1818
#[pyfunction]
19-
pub fn head_async(py: Python, store: PyObjectStore, path: String) -> PyResult<Bound<PyAny>> {
19+
pub fn head_async(py: Python, store: PyObjectStore, path: PyPath) -> PyResult<Bound<PyAny>> {
2020
let store = store.into_inner().clone();
2121
pyo3_async_runtimes::tokio::future_into_py(py, async move {
2222
let meta = store
23-
.head(&path.into())
23+
.head(path.as_ref())
2424
.await
2525
.map_err(PyObjectStoreError::ObjectStoreError)?;
2626
Ok(PyObjectMeta::new(meta))

obstore/src/path.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use object_store::path::Path;
22
use pyo3::exceptions::PyTypeError;
33
use pyo3::prelude::*;
4+
use pyo3_object_store::PyPath;
45

56
pub(crate) enum PyPaths {
67
One(Path),
@@ -10,11 +11,11 @@ pub(crate) enum PyPaths {
1011

1112
impl<'py> FromPyObject<'py> for PyPaths {
1213
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
13-
if let Ok(path) = ob.extract::<String>() {
14-
Ok(Self::One(path.into()))
15-
} else if let Ok(paths) = ob.extract::<Vec<String>>() {
14+
if let Ok(path) = ob.extract::<PyPath>() {
15+
Ok(Self::One(path.into_inner()))
16+
} else if let Ok(paths) = ob.extract::<Vec<PyPath>>() {
1617
Ok(Self::Many(
17-
paths.into_iter().map(|path| path.into()).collect(),
18+
paths.into_iter().map(|path| path.into_inner()).collect(),
1819
))
1920
} else {
2021
Err(PyTypeError::new_err(

obstore/src/put.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use pyo3::{intern, IntoPyObjectExt};
1919
use pyo3_async_runtimes::tokio::get_runtime;
2020
use pyo3_bytes::PyBytes;
2121
use pyo3_file::PyFileLikeObject;
22-
use pyo3_object_store::{PyObjectStore, PyObjectStoreResult};
22+
use pyo3_object_store::{PyObjectStore, PyObjectStoreResult, PyPath};
2323

2424
use crate::attributes::PyAttributes;
2525
use crate::tags::PyTagSet;
@@ -295,7 +295,7 @@ impl<'py> IntoPyObject<'py> for PyPutResult {
295295
pub(crate) fn put(
296296
py: Python,
297297
store: PyObjectStore,
298-
path: String,
298+
path: PyPath,
299299
mut file: PutInput,
300300
attributes: Option<PyAttributes>,
301301
tags: Option<PyTagSet>,
@@ -328,7 +328,7 @@ pub(crate) fn put(
328328
if use_multipart {
329329
runtime.block_on(put_multipart_inner(
330330
store.into_inner(),
331-
&path.into(),
331+
path.as_ref(),
332332
file,
333333
chunk_size,
334334
max_concurrency,
@@ -338,7 +338,7 @@ pub(crate) fn put(
338338
} else {
339339
runtime.block_on(put_inner(
340340
store.into_inner(),
341-
&path.into(),
341+
path.as_ref(),
342342
file,
343343
attributes,
344344
tags,
@@ -354,7 +354,7 @@ pub(crate) fn put(
354354
pub(crate) fn put_async(
355355
py: Python,
356356
store: PyObjectStore,
357-
path: String,
357+
path: PyPath,
358358
mut file: PutInput,
359359
attributes: Option<PyAttributes>,
360360
tags: Option<PyTagSet>,
@@ -380,7 +380,7 @@ pub(crate) fn put_async(
380380
let result = if use_multipart {
381381
put_multipart_inner(
382382
store.into_inner(),
383-
&path.into(),
383+
path.as_ref(),
384384
file,
385385
chunk_size,
386386
max_concurrency,
@@ -391,7 +391,7 @@ pub(crate) fn put_async(
391391
} else {
392392
put_inner(
393393
store.into_inner(),
394-
&path.into(),
394+
path.as_ref(),
395395
file,
396396
attributes,
397397
tags,

pyo3-object_store/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pub use gcp::PyGCSStore;
2828
pub use http::PyHttpStore;
2929
pub use local::PyLocalStore;
3030
pub use memory::PyMemoryStore;
31+
pub use path::PyPath;
3132
pub use prefix::MaybePrefixedStore;
3233
pub use simple::from_url;
3334
pub use store::{AnyObjectStore, PyExternalObjectStore, PyObjectStore};

pyo3-object_store/src/path.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,25 @@
11
use object_store::path::Path;
2+
use pyo3::exceptions::PyValueError;
23
use pyo3::prelude::*;
4+
use pyo3::pybacked::PyBackedStr;
35
use pyo3::types::PyString;
46

7+
/// A Python-facing wrapper around a [`Path`].
58
#[derive(Clone, Debug, Default, PartialEq)]
6-
pub(crate) struct PyPath(Path);
9+
pub struct PyPath(Path);
710

811
impl<'py> FromPyObject<'py> for PyPath {
912
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
10-
Ok(Self(ob.extract::<String>()?.into()))
13+
let path = Path::parse(ob.extract::<PyBackedStr>()?)
14+
.map_err(|err| PyValueError::new_err(format!("Could not parse path: {err}")))?;
15+
Ok(Self(path))
16+
}
17+
}
18+
19+
impl PyPath {
20+
/// Consume self and return the underlying [`Path`].
21+
pub fn into_inner(self) -> Path {
22+
self.0
1123
}
1224
}
1325

tests/store/test_local.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,20 @@ def test_eq():
7878
assert store == store # noqa: PLR0124
7979
assert store == store2
8080
assert store != store3
81+
82+
83+
def test_local_store_percent_encoded(tmp_path: Path):
84+
fname1 = "hello%20world.txt"
85+
content1 = b"Hello, World!"
86+
with (tmp_path / fname1).open("wb") as f:
87+
f.write(content1)
88+
89+
store = LocalStore(tmp_path)
90+
assert store.get(fname1).bytes() == content1
91+
92+
fname2 = "hello world.txt"
93+
content2 = b"Hello, World! (with spaces)"
94+
with (tmp_path / fname2).open("wb") as f:
95+
f.write(content2)
96+
97+
assert store.get(fname2).bytes() == content2

0 commit comments

Comments
 (0)