diff --git a/docs/source/features.rst b/docs/source/features.rst index 949e47107..badc1fb24 100644 --- a/docs/source/features.rst +++ b/docs/source/features.rst @@ -241,7 +241,9 @@ reads the same zip-file, but extracts the CSV files and stores them locally in t **For developers**: this "chaining" methods works by formatting the arguments passed to ``open_*`` into ``target_protocol`` (a simple string) and ``target_options`` (a dict) and also optionally ``fo`` (target path, if a specific file is required). In order for an implementation to chain -successfully like this, it must look for exactly those named arguments. +successfully like this, it must look for exactly those named arguments. Implementations that +require access to the target path of their nested targets should inherit from ``ChainedFileSystem``, +which will trigger pass-through of the nested path automatically. Caching Files Locally --------------------- diff --git a/fsspec/core.py b/fsspec/core.py index d8e75572b..a7db808ae 100644 --- a/fsspec/core.py +++ b/fsspec/core.py @@ -330,7 +330,7 @@ def open_files( def _un_chain(path, kwargs): # Avoid a circular import - from fsspec.implementations.cached import CachingFileSystem + from fsspec.implementations.chained import ChainedFileSystem if "::" in path: x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word @@ -358,7 +358,7 @@ def _un_chain(path, kwargs): **kws, ) bit = cls._strip_protocol(bit) - if "target_protocol" not in kw and issubclass(cls, CachingFileSystem): + if "target_protocol" not in kw and issubclass(cls, ChainedFileSystem): bit = previous_bit out.append((bit, protocol, kw)) previous_bit = bit diff --git a/fsspec/implementations/cached.py b/fsspec/implementations/cached.py index 74e6a59e4..8e4d0a0ba 100644 --- a/fsspec/implementations/cached.py +++ b/fsspec/implementations/cached.py @@ -9,13 +9,14 @@ from shutil import rmtree from typing import TYPE_CHECKING, Any, Callable, ClassVar -from fsspec import AbstractFileSystem, filesystem +from fsspec import filesystem from fsspec.callbacks import DEFAULT_CALLBACK from fsspec.compression import compr from fsspec.core import BaseCache, MMapCache from fsspec.exceptions import BlocksizeMismatchError from fsspec.implementations.cache_mapper import create_cache_mapper from fsspec.implementations.cache_metadata import CacheMetadata +from fsspec.implementations.chained import ChainedFileSystem from fsspec.implementations.local import LocalFileSystem from fsspec.spec import AbstractBufferedFile from fsspec.transaction import Transaction @@ -39,7 +40,7 @@ def complete(self, commit=True): self.fs = None # break cycle -class CachingFileSystem(AbstractFileSystem): +class CachingFileSystem(ChainedFileSystem): """Locally caching filesystem, layer over any other FS This class implements chunk-wise local storage of remote files, for quick diff --git a/fsspec/implementations/chained.py b/fsspec/implementations/chained.py new file mode 100644 index 000000000..bfce64334 --- /dev/null +++ b/fsspec/implementations/chained.py @@ -0,0 +1,23 @@ +from typing import ClassVar + +from fsspec import AbstractFileSystem + +__all__ = ("ChainedFileSystem",) + + +class ChainedFileSystem(AbstractFileSystem): + """Chained filesystem base class. + + A chained filesystem is designed to be layered over another FS. + This is useful to implement things like caching. + + This base class does very little on its own, but is used as a marker + that the class is designed for chaining. + + Right now this is only used in `url_to_fs` to provide the path argument + (`fo`) to the chained filesystem from the underlying filesystem. + + Additional functionality may be added in the future. + """ + + protocol: ClassVar[str] = "chained" diff --git a/fsspec/tests/test_chained.py b/fsspec/tests/test_chained.py new file mode 100644 index 000000000..0c7579481 --- /dev/null +++ b/fsspec/tests/test_chained.py @@ -0,0 +1,39 @@ +import pytest + +from fsspec import AbstractFileSystem, filesystem, register_implementation, url_to_fs +from fsspec.implementations.cached import ChainedFileSystem + + +class MyChainedFS(ChainedFileSystem): + protocol = "mychain" + + def __init__(self, target_protocol="", target_options=None, **kwargs): + super().__init__(**kwargs) + self.fs = filesystem(target_protocol, **target_options) + + +class MyNonChainedFS(AbstractFileSystem): + protocol = "mynonchain" + + +@pytest.fixture(scope="module") +def register_fs(): + register_implementation(MyChainedFS.protocol, MyChainedFS) + register_implementation(MyNonChainedFS.protocol, MyNonChainedFS) + yield + + +def test_token_passthrough_to_chained(register_fs): + # First, run a sanity check + fs, rest = url_to_fs("mynonchain://path/to/file") + assert isinstance(fs, MyNonChainedFS) + assert fs.protocol == "mynonchain" + assert rest == "path/to/file" + + # Now test that the chained FS works + fs, rest = url_to_fs("mychain::mynonchain://path/to/file") + assert isinstance(fs, MyChainedFS) + assert fs.protocol == "mychain" + assert rest == "path/to/file" + assert isinstance(fs.fs, MyNonChainedFS) + assert fs.fs.protocol == "mynonchain"