Skip to content

Commit da155c6

Browse files
avoid instantiating filesystem for path operations (#176)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 42999eb commit da155c6

File tree

2 files changed

+8
-17
lines changed

2 files changed

+8
-17
lines changed

src/datachain/lib/file.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from urllib.request import url2pathname
1313

1414
from fsspec.callbacks import DEFAULT_CALLBACK, Callback
15-
from fsspec.implementations.local import LocalFileSystem
1615
from PIL import Image
1716
from pydantic import Field, field_validator
1817

@@ -283,9 +282,8 @@ def get_uri(self):
283282
def get_path(self) -> str:
284283
"""Returns file path."""
285284
path = unquote(self.get_uri())
286-
fs = self.get_fs()
287-
if isinstance(fs, LocalFileSystem):
288-
# Drop file:// protocol
285+
source = urlparse(self.source)
286+
if source.scheme == "file":
289287
path = urlparse(path).path
290288
path = url2pathname(path)
291289
return path
@@ -300,13 +298,10 @@ def get_destination_path(self, output: str, placement: ExportPlacement) -> str:
300298
elif placement == "etag":
301299
path = f"{self.etag}{self.get_file_suffix()}"
302300
elif placement == "fullpath":
303-
fs = self.get_fs()
304-
if isinstance(fs, LocalFileSystem):
305-
path = unquote(self.get_full_name())
306-
else:
307-
path = (
308-
Path(urlparse(self.source).netloc) / unquote(self.get_full_name())
309-
).as_posix()
301+
path = unquote(self.get_full_name())
302+
source = urlparse(self.source)
303+
if source.scheme and source.scheme != "file":
304+
path = posixpath.join(source.netloc, path)
310305
elif placement == "checksum":
311306
raise NotImplementedError("Checksum placement not implemented yet")
312307
else:

tests/func/test_datachain.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,8 @@
1717

1818

1919
@pytest.mark.parametrize("anon", [True, False])
20-
def test_catalog_anon(catalog, anon):
21-
chain = (
22-
DataChain.from_storage("gs://dvcx-datalakes/dogs-and-cats/", anon=anon)
23-
.limit(5)
24-
.save("test_catalog_anon")
25-
)
20+
def test_catalog_anon(tmp_dir, catalog, anon):
21+
chain = DataChain.from_storage(tmp_dir.as_uri(), anon=anon)
2622
assert chain.catalog.client_config.get("anon", False) is anon
2723

2824

0 commit comments

Comments
 (0)