Skip to content

Commit 486eb2e

Browse files
Make VFS accept path-like objects to refer to files. (#1818)
When working on a change in tiledbsoma [1], we ran into somewhat mysterious error messages where things had worked before. This was because, unlike `open` (and other libraries which accept path-likes), `tiledb.VFS` only accepts `str` (and undocumented `bytes`) paths. Accepting path-like objects (like `pathlib.Path`s) lets us play better with other libraries and user code. [1]: single-cell-data/TileDB-SOMA#1629
1 parent 5824126 commit 486eb2e

File tree

2 files changed

+59
-43
lines changed

2 files changed

+59
-43
lines changed

tiledb/tests/test_vfs.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import io
22
import os
3+
import pathlib
34
import random
45
import sys
56

@@ -135,11 +136,11 @@ def test_write_read(self):
135136
vfs = tiledb.VFS()
136137

137138
buffer = b"bar"
138-
fio = vfs.open(self.path("foo"), "wb")
139+
fio = vfs.open(pathlib.Path(self.path("foo")), "wb")
139140
fio.write(buffer)
140141
self.assertEqual(vfs.file_size(self.path("foo")), 3)
141142

142-
fio = vfs.open(self.path("foo"), "rb")
143+
fio = vfs.open(self.path("foo").encode("utf-8"), "rb")
143144
self.assertEqual(fio.read(3), buffer)
144145
fio.close()
145146

tiledb/vfs.py

Lines changed: 56 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
import io
2+
import os
23
from types import TracebackType
34
from typing import List, Optional, Type, Union
45

56
import tiledb.cc as lt
67

78
from .ctx import Config, Ctx, default_ctx
89

10+
_AnyPath = Union[str, bytes, os.PathLike]
11+
912

1013
class VFS(lt.VFS):
1114
"""TileDB VFS class
@@ -29,7 +32,7 @@ def __init__(self, config: Union[Config, dict] = None, ctx: Optional[Ctx] = None
2932
else:
3033
try:
3134
config = dict(config)
32-
except:
35+
except Exception:
3336
raise ValueError("`config` argument must be of type Config or dict")
3437

3538
ccfg = lt.Config(config)
@@ -51,7 +54,7 @@ def config(self) -> Config:
5154
"""
5255
return self._config
5356

54-
def open(self, uri: str, mode: str = "rb"):
57+
def open(self, uri: _AnyPath, mode: str = "rb"):
5558
"""Opens a VFS file resource for reading / writing / appends at URI.
5659
5760
If the file did not exist upon opening, a new file is created.
@@ -144,145 +147,145 @@ def supports(self, scheme: str) -> bool:
144147

145148
return self._ctx.is_supported_fs(scheme_to_fs_type[scheme])
146149

147-
def create_bucket(self, uri: str):
150+
def create_bucket(self, uri: _AnyPath):
148151
"""Creates an object store bucket with the input URI.
149152
150153
:param str uri: Input URI of the bucket
151154
152155
"""
153-
return self._create_bucket(uri)
156+
return self._create_bucket(_to_path_str(uri))
154157

155-
def remove_bucket(self, uri: str):
158+
def remove_bucket(self, uri: _AnyPath):
156159
"""Deletes an object store bucket with the input URI.
157160
158161
:param str uri: Input URI of the bucket
159162
160163
"""
161-
return self._remove_bucket(uri)
164+
return self._remove_bucket(_to_path_str(uri))
162165

163-
def is_bucket(self, uri: str) -> bool:
166+
def is_bucket(self, uri: _AnyPath) -> bool:
164167
"""
165168
:param str uri: Input URI of the bucket
166169
:rtype: bool
167170
:return: True if an object store bucket with the input URI exists, False otherwise
168171
169172
"""
170-
return self._is_bucket(uri)
173+
return self._is_bucket(_to_path_str(uri))
171174

172-
def empty_bucket(self, uri: str):
175+
def empty_bucket(self, uri: _AnyPath):
173176
"""Empty an object store bucket.
174177
175178
:param str uri: Input URI of the bucket
176179
177180
"""
178-
return self._empty_bucket(uri)
181+
return self._empty_bucket(_to_path_str(uri))
179182

180-
def is_empty_bucket(self, uri: str) -> bool:
183+
def is_empty_bucket(self, uri: _AnyPath) -> bool:
181184
"""
182185
:param str uri: Input URI of the bucket
183186
:rtype: bool
184187
:return: True if an object store bucket is empty, False otherwise
185188
186189
"""
187-
return self._is_empty_bucket(uri)
190+
return self._is_empty_bucket(_to_path_str(uri))
188191

189-
def create_dir(self, uri: str):
192+
def create_dir(self, uri: _AnyPath):
190193
"""Check if an object store bucket is empty.
191194
192195
:param str uri: Input URI of the bucket
193196
194197
"""
195-
return self._create_dir(uri)
198+
return self._create_dir(_to_path_str(uri))
196199

197-
def is_dir(self, uri: str) -> bool:
200+
def is_dir(self, uri: _AnyPath) -> bool:
198201
"""
199202
:param str uri: Input URI of the directory
200203
:rtype: bool
201204
:return: True if a directory with the input URI exists, False otherwise
202205
203206
"""
204-
return self._is_dir(uri)
207+
return self._is_dir(_to_path_str(uri))
205208

206-
def remove_dir(self, uri: str):
209+
def remove_dir(self, uri: _AnyPath):
207210
"""Removes a directory (recursively) with the input URI.
208211
209212
:param str uri: Input URI of the directory
210213
211214
"""
212-
return self._remove_dir(uri)
215+
return self._remove_dir(_to_path_str(uri))
213216

214-
def dir_size(self, uri: str) -> int:
217+
def dir_size(self, uri: _AnyPath) -> int:
215218
"""
216219
:param str uri: Input URI of the directory
217220
:rtype: int
218221
:return: The size of a directory with the input URI
219222
220223
"""
221-
return self._dir_size(uri)
224+
return self._dir_size(_to_path_str(uri))
222225

223-
def move_dir(self, old_uri: str, new_uri: str):
226+
def move_dir(self, old_uri: _AnyPath, new_uri: _AnyPath):
224227
"""Renames a TileDB directory from an old URI to a new URI.
225228
226229
:param str old_uri: Input of the old directory URI
227230
:param str new_uri: Input of the new directory URI
228231
229232
"""
230-
return self._move_dir(old_uri, new_uri)
233+
return self._move_dir(_to_path_str(old_uri), _to_path_str(new_uri))
231234

232-
def copy_dir(self, old_uri: str, new_uri: str):
235+
def copy_dir(self, old_uri: _AnyPath, new_uri: _AnyPath):
233236
"""Copies a TileDB directory from an old URI to a new URI.
234237
235238
:param str old_uri: Input of the old directory URI
236239
:param str new_uri: Input of the new directory URI
237240
238241
"""
239-
return self._copy_dir(old_uri, new_uri)
242+
return self._copy_dir(_to_path_str(old_uri), _to_path_str(new_uri))
240243

241-
def is_file(self, uri: str) -> bool:
244+
def is_file(self, uri: _AnyPath) -> bool:
242245
"""
243246
:param str uri: Input URI of the file
244247
:rtype: bool
245248
:return: True if a file with the input URI exists, False otherwise
246249
247250
"""
248-
return self._is_file(uri)
251+
return self._is_file(_to_path_str(uri))
249252

250-
def remove_file(self, uri: str):
253+
def remove_file(self, uri: _AnyPath):
251254
"""Removes a file with the input URI.
252255
253256
:param str uri: Input URI of the file
254257
255258
"""
256-
return self._remove_file(uri)
259+
return self._remove_file(_to_path_str(uri))
257260

258-
def file_size(self, uri: str) -> int:
261+
def file_size(self, uri: _AnyPath) -> int:
259262
"""
260263
:param str uri: Input URI of the file
261264
:rtype: int
262265
:return: The size of a file with the input URI
263266
264267
"""
265-
return self._file_size(uri)
268+
return self._file_size(_to_path_str(uri))
266269

267-
def move_file(self, old_uri: str, new_uri: str):
270+
def move_file(self, old_uri: _AnyPath, new_uri: _AnyPath):
268271
"""Renames a TileDB file from an old URI to a new URI.
269272
270273
:param str old_uri: Input of the old file URI
271274
:param str new_uri: Input of the new file URI
272275
273276
"""
274-
return self._move_file(old_uri, new_uri)
277+
return self._move_file(_to_path_str(old_uri), _to_path_str(new_uri))
275278

276-
def copy_file(self, old_uri: str, new_uri: str):
279+
def copy_file(self, old_uri: _AnyPath, new_uri: _AnyPath):
277280
"""Copies a TileDB file from an old URI to a new URI.
278281
279282
:param str old_uri: Input of the old file URI
280283
:param str new_uri: Input of the new file URI
281284
282285
"""
283-
return self._copy_file(old_uri, new_uri)
286+
return self._copy_file(_to_path_str(old_uri), _to_path_str(new_uri))
284287

285-
def ls(self, uri: str) -> List[str]:
288+
def ls(self, uri: _AnyPath) -> List[str]:
286289
"""Retrieves the children in directory `uri`. This function is
287290
non-recursive, i.e., it focuses in one level below `uri`.
288291
@@ -291,22 +294,23 @@ def ls(self, uri: str) -> List[str]:
291294
:return: The children in directory `uri`
292295
293296
"""
294-
return self._ls(uri)
297+
return self._ls(_to_path_str(uri))
295298

296-
def touch(self, uri: str):
299+
def touch(self, uri: _AnyPath):
297300
"""Touches a file with the input URI, i.e., creates a new empty file.
298301
299302
:param str uri: Input URI of the file
300303
301304
"""
302-
return self._touch(uri)
305+
return self._touch(_to_path_str(uri))
303306

304307

305308
class FileIO(io.RawIOBase):
306309
"""TileDB FileIO class that encapsulates files opened by tiledb.VFS. The file
307310
operations are meant to mimic Python's built-in file I/O methods."""
308311

309-
def __init__(self, vfs: VFS, uri: str, mode: str = "rb"):
312+
def __init__(self, vfs: VFS, uri: _AnyPath, mode: str = "rb"):
313+
uri = _to_path_str(uri)
310314
self._vfs = vfs
311315

312316
str_to_vfs_mode = {
@@ -324,8 +328,8 @@ def __init__(self, vfs: VFS, uri: str, mode: str = "rb"):
324328
if self._mode == "rb":
325329
try:
326330
self._nbytes = vfs.file_size(uri)
327-
except:
328-
raise lt.TileDBError(f"URI {uri} is not a valid file")
331+
except Exception as e:
332+
raise lt.TileDBError(f"URI {uri!r} is not a valid file") from e
329333

330334
self._fh = lt.FileHandle(
331335
self._vfs._ctx, self._vfs, uri, str_to_vfs_mode[self._mode]
@@ -504,3 +508,14 @@ def readinto(self, buff: bytes) -> int:
504508

505509
def readinto1(self, b):
506510
return self.readinto(b)
511+
512+
513+
def _to_path_str(pth: _AnyPath) -> Union[str, bytes]:
514+
if isinstance(pth, (str, bytes)):
515+
return pth
516+
try:
517+
return pth.__fspath__()
518+
except AttributeError as ae:
519+
raise TypeError(
520+
"VFS paths must be strings, bytes, or os.PathLike objects"
521+
) from ae

0 commit comments

Comments
 (0)