From e2e8b9bdfdb443487443daa149a0423b6b9709b1 Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Thu, 7 Aug 2025 20:45:26 +0200 Subject: [PATCH 1/5] memory: _intrans = False --- fsspec/implementations/memory.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fsspec/implementations/memory.py b/fsspec/implementations/memory.py index 838876d1f..fddca7d02 100644 --- a/fsspec/implementations/memory.py +++ b/fsspec/implementations/memory.py @@ -25,6 +25,7 @@ class MemoryFileSystem(AbstractFileSystem): pseudo_dirs = [""] # global, do not overwrite! protocol = "memory" root_marker = "/" + _intrans = False @classmethod def _strip_protocol(cls, path): From 66072aceb2371dab2a70dfdf318fee49325fa954 Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Thu, 7 Aug 2025 20:46:27 +0200 Subject: [PATCH 2/5] memory: add __init__ --- fsspec/implementations/memory.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fsspec/implementations/memory.py b/fsspec/implementations/memory.py index fddca7d02..d3cc80ac6 100644 --- a/fsspec/implementations/memory.py +++ b/fsspec/implementations/memory.py @@ -27,6 +27,10 @@ class MemoryFileSystem(AbstractFileSystem): root_marker = "/" _intrans = False + def __init__(self, *args, **kwargs): + self.logger = logger + super().__init__(*args, **kwargs) + @classmethod def _strip_protocol(cls, path): if isinstance(path, PurePath): @@ -148,7 +152,7 @@ def rmdir(self, path): raise FileNotFoundError(path) def info(self, path, **kwargs): - logger.debug("info: %s", path) + self.logger.debug("info: %s", path) path = self._strip_protocol(path) if path in self.pseudo_dirs or any( p.startswith(path + "/") for p in list(self.store) + self.pseudo_dirs @@ -203,7 +207,7 @@ def _open( elif mode in {"wb", "xb"}: if mode == "xb" and self.exists(path): raise FileExistsError - m = MemoryFile(self, path, kwargs.get("data")) + m = MemoryFile(self, path, kwargs.get("data"), self.logger) if not self._intrans: m.commit() return m @@ -216,7 +220,7 @@ def cp_file(self, path1, path2, **kwargs): path2 = self._strip_protocol(path2) if self.isfile(path1): self.store[path2] = MemoryFile( - self, path2, self.store[path1].getvalue() + self, path2, self.store[path1].getvalue(), self.logger ) # implicit copy elif self.isdir(path1): if path2 not in self.pseudo_dirs: @@ -225,7 +229,7 @@ def cp_file(self, path1, path2, **kwargs): raise FileNotFoundError(path1) def cat_file(self, path, start=None, end=None, **kwargs): - logger.debug("cat: %s", path) + self.logger.debug("cat: %s", path) path = self._strip_protocol(path) try: return bytes(self.store[path].getbuffer()[start:end]) @@ -284,8 +288,9 @@ class MemoryFile(BytesIO): No need to provide fs, path if auto-committing (default) """ - def __init__(self, fs=None, path=None, data=None): - logger.debug("open file %s", path) + def __init__(self, fs=None, path=None, data=None, logger=logger): + self.logger = logger + self.logger.debug("open file %s", path) self.fs = fs self.path = path self.created = datetime.now(tz=timezone.utc) From 5953c8626c23d9be69eca2e5e7a67c4d7fe149cd Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Thu, 7 Aug 2025 20:46:39 +0200 Subject: [PATCH 3/5] localmemory: init --- fsspec/implementations/localmemory.py | 28 +++++++++++++++++++ .../implementations/tests/test_localmemory.py | 24 ++++++++++++++++ fsspec/registry.py | 1 + 3 files changed, 53 insertions(+) create mode 100644 fsspec/implementations/localmemory.py create mode 100644 fsspec/implementations/tests/test_localmemory.py diff --git a/fsspec/implementations/localmemory.py b/fsspec/implementations/localmemory.py new file mode 100644 index 000000000..ba074dffd --- /dev/null +++ b/fsspec/implementations/localmemory.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +import logging + +from fsspec.implementations.memory import MemoryFileSystem + +logger = logging.getLogger("fsspec.localmemoryfs") + + +class LocalMemoryFileSystem(MemoryFileSystem): + """A filesystem based on a dict of BytesIO objects + + This is a local filesystem so different instances of this class + point to different memory filesystems. + """ + + store = None + pseudo_dirs = None + protocol = "localmemory" + root_marker = "/" + _intrans = False + cachable = False # same as: skip_instance_cache = True + + def __init__(self, *args, **kwargs): + self.logger = logger # global + self.store: dict[str, Any] = {} # local + self.pseudo_dirs = [""] # local + super().__init__(*args, **kwargs) diff --git a/fsspec/implementations/tests/test_localmemory.py b/fsspec/implementations/tests/test_localmemory.py new file mode 100644 index 000000000..a16439821 --- /dev/null +++ b/fsspec/implementations/tests/test_localmemory.py @@ -0,0 +1,24 @@ +import pytest + +import fsspec +from fsspec.implementations.localmemory import LocalMemoryFileSystem + + +def test_protocol(): + # this should not throw: ValueError: Protocol not known: localmemory + fsspec.filesystem("localmemory") + + +def test_init(): + fs1 = LocalMemoryFileSystem() + fs2 = LocalMemoryFileSystem() + + # check that fs1 and fs2 are different instances of LocalMemoryFileSystem + assert id(fs1) != id(fs2) + assert id(fs1.store) != id(fs2.store) + assert id(fs1.pseudo_dirs) != id(fs2.pseudo_dirs) + + fs1.touch("/fs1.txt") + fs2.touch("/fs2.txt") + assert fs1.ls("/", detail=False) == ["/fs1.txt"] + assert fs2.ls("/", detail=False) == ["/fs2.txt"] diff --git a/fsspec/registry.py b/fsspec/registry.py index 96ffad7f4..8f4ec9167 100644 --- a/fsspec/registry.py +++ b/fsspec/registry.py @@ -171,6 +171,7 @@ def register_implementation(name, cls, clobber=False, errtxt=None): "err": "LibArchive requires to be installed", }, "local": {"class": "fsspec.implementations.local.LocalFileSystem"}, + "localmemory": {"class": "fsspec.implementations.localmemory.LocalMemoryFileSystem"}, "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"}, "oci": { "class": "ocifs.OCIFileSystem", From deb502c1da0cfa0c89aa5dd2971dbdaec1724ffa Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 7 Aug 2025 15:30:28 -0400 Subject: [PATCH 4/5] lint --- fsspec/implementations/localmemory.py | 3 +-- fsspec/implementations/tests/test_localmemory.py | 2 -- fsspec/registry.py | 4 +++- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fsspec/implementations/localmemory.py b/fsspec/implementations/localmemory.py index ba074dffd..6f8af2e54 100644 --- a/fsspec/implementations/localmemory.py +++ b/fsspec/implementations/localmemory.py @@ -14,7 +14,6 @@ class LocalMemoryFileSystem(MemoryFileSystem): point to different memory filesystems. """ - store = None pseudo_dirs = None protocol = "localmemory" root_marker = "/" @@ -23,6 +22,6 @@ class LocalMemoryFileSystem(MemoryFileSystem): def __init__(self, *args, **kwargs): self.logger = logger # global - self.store: dict[str, Any] = {} # local + self.store = {} # local self.pseudo_dirs = [""] # local super().__init__(*args, **kwargs) diff --git a/fsspec/implementations/tests/test_localmemory.py b/fsspec/implementations/tests/test_localmemory.py index a16439821..71d7f10ed 100644 --- a/fsspec/implementations/tests/test_localmemory.py +++ b/fsspec/implementations/tests/test_localmemory.py @@ -1,5 +1,3 @@ -import pytest - import fsspec from fsspec.implementations.localmemory import LocalMemoryFileSystem diff --git a/fsspec/registry.py b/fsspec/registry.py index 8f4ec9167..20dc1ab4c 100644 --- a/fsspec/registry.py +++ b/fsspec/registry.py @@ -171,7 +171,9 @@ def register_implementation(name, cls, clobber=False, errtxt=None): "err": "LibArchive requires to be installed", }, "local": {"class": "fsspec.implementations.local.LocalFileSystem"}, - "localmemory": {"class": "fsspec.implementations.localmemory.LocalMemoryFileSystem"}, + "localmemory": { + "class": "fsspec.implementations.localmemory.LocalMemoryFileSystem" + }, "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"}, "oci": { "class": "ocifs.OCIFileSystem", From 820eca997d34a1ca80f3a62290e38828257127a8 Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Mon, 11 Aug 2025 21:03:53 +0200 Subject: [PATCH 5/5] localmemory: merge with memory --- fsspec/implementations/localmemory.py | 27 --------- fsspec/implementations/memory.py | 57 +++++++++++++++++- .../implementations/tests/test_localmemory.py | 22 ------- fsspec/implementations/tests/test_memory.py | 60 +++++++++++++++++++ fsspec/registry.py | 3 - fsspec/spec.py | 14 ++++- 6 files changed, 127 insertions(+), 56 deletions(-) delete mode 100644 fsspec/implementations/localmemory.py delete mode 100644 fsspec/implementations/tests/test_localmemory.py diff --git a/fsspec/implementations/localmemory.py b/fsspec/implementations/localmemory.py deleted file mode 100644 index 6f8af2e54..000000000 --- a/fsspec/implementations/localmemory.py +++ /dev/null @@ -1,27 +0,0 @@ -from __future__ import annotations - -import logging - -from fsspec.implementations.memory import MemoryFileSystem - -logger = logging.getLogger("fsspec.localmemoryfs") - - -class LocalMemoryFileSystem(MemoryFileSystem): - """A filesystem based on a dict of BytesIO objects - - This is a local filesystem so different instances of this class - point to different memory filesystems. - """ - - pseudo_dirs = None - protocol = "localmemory" - root_marker = "/" - _intrans = False - cachable = False # same as: skip_instance_cache = True - - def __init__(self, *args, **kwargs): - self.logger = logger # global - self.store = {} # local - self.pseudo_dirs = [""] # local - super().__init__(*args, **kwargs) diff --git a/fsspec/implementations/memory.py b/fsspec/implementations/memory.py index d3cc80ac6..5a2a44628 100644 --- a/fsspec/implementations/memory.py +++ b/fsspec/implementations/memory.py @@ -21,15 +21,66 @@ class MemoryFileSystem(AbstractFileSystem): in memory filesystem. """ - store: ClassVar[dict[str, Any]] = {} # global, do not overwrite! - pseudo_dirs = [""] # global, do not overwrite! + global_store: ClassVar[dict[str, Any]] = {} # global, do not overwrite! + global_pseudo_dirs = [""] # global, do not overwrite! protocol = "memory" root_marker = "/" _intrans = False + # never called + # def __call__(cls, *args, **kwargs): + # print("MemoryFileSystem call kwargs", kwargs) + # skip = kwargs.pop("skip_instance_cache", False) + # instance = super().__call__(*args, **kwargs) + # if skip: + # instance.store = {} + # instance.pseudo_dirs = [""] + # else: + # instance.store = instance.global_store + # instance.pseudo_dirs = instance.global_pseudo_dirs + # return instance + + # FIXME AttributeError: 'MemoryFileSystem' object has no attribute '_skip_instance_cache' + # def __new__(cls, *args, **kwargs): + # # print("new kwargs", kwargs) + # # skip = kwargs.pop("skip_instance_cache", False) + # instance = super().__new__(cls, *args, **kwargs) + # # FIXME AttributeError: 'MemoryFileSystem' object has no attribute '_skip_instance_cache' + # skip = instance._skip_instance_cache + # print("new skip", skip) + # if skip: + # instance.store = {} + # instance.pseudo_dirs = [""] + # else: + # instance.store = instance.global_store + # instance.pseudo_dirs = instance.global_pseudo_dirs + # return instance + def __init__(self, *args, **kwargs): - self.logger = logger + # print("MemoryFileSystem init kwargs", kwargs) super().__init__(*args, **kwargs) + self.logger = logger + # print("MemoryFileSystem init kwargs", kwargs) + # skip = kwargs.pop("skip_instance_cache", False) + # if skip: + # self.store = {} + # self.pseudo_dirs = [""] + # local_memory = kwargs.pop("local_memory", False) + # if local_memory: + # skip = self._skip_instance_cache + skip = kwargs.get("skip_instance_cache", False) + # print("MemoryFileSystem init skip", skip) + # FIXME skip is None + # assert skip in (True, False) + if skip: + # local + self.store = {} + self.pseudo_dirs = [""] + else: + # global + self.store = self.global_store + self.pseudo_dirs = self.global_pseudo_dirs + # super().__init__(*args, **kwargs) @classmethod def _strip_protocol(cls, path): diff --git a/fsspec/implementations/tests/test_localmemory.py b/fsspec/implementations/tests/test_localmemory.py deleted file mode 100644 index 71d7f10ed..000000000 --- a/fsspec/implementations/tests/test_localmemory.py +++ /dev/null @@ -1,22 +0,0 @@ -import fsspec -from fsspec.implementations.localmemory import LocalMemoryFileSystem - - -def test_protocol(): - # this should not throw: ValueError: Protocol not known: localmemory - fsspec.filesystem("localmemory") - - -def test_init(): - fs1 = LocalMemoryFileSystem() - fs2 = LocalMemoryFileSystem() - - # check that fs1 and fs2 are different instances of LocalMemoryFileSystem - assert id(fs1) != id(fs2) - assert id(fs1.store) != id(fs2.store) - assert id(fs1.pseudo_dirs) != id(fs2.pseudo_dirs) - - fs1.touch("/fs1.txt") - fs2.touch("/fs2.txt") - assert fs1.ls("/", detail=False) == ["/fs1.txt"] - assert fs2.ls("/", detail=False) == ["/fs2.txt"] diff --git a/fsspec/implementations/tests/test_memory.py b/fsspec/implementations/tests/test_memory.py index 600022a03..9d74266ca 100644 --- a/fsspec/implementations/tests/test_memory.py +++ b/fsspec/implementations/tests/test_memory.py @@ -4,6 +4,66 @@ import pytest from fsspec.implementations.local import LocalFileSystem, make_path_posix +from fsspec.implementations.memory import MemoryFileSystem + + +def test_identical_instances(): + fs1 = MemoryFileSystem() + fs2 = MemoryFileSystem() + + assert id(fs1) == id(fs2) + assert id(fs1.store) == id(fs2.store) + + fs1.touch("/fs1.txt") + fs2.touch("/fs2.txt") + assert fs1.ls("/", detail=False) == ["/fs1.txt", "/fs2.txt"] + assert fs2.ls("/", detail=False) == ["/fs1.txt", "/fs2.txt"] + + +def _clear(m): + m.store.clear() + m.pseudo_dirs.clear() + m.pseudo_dirs.append("") + + +def test_separate_instances_1_1(): + # fs1 = MemoryFileSystem(local_memory=True) + # fs2 = MemoryFileSystem(local_memory=True) + # FIXME only one param + # fs1 = MemoryFileSystem(skip_instance_cache=True, local_memory=True) + # fs2 = MemoryFileSystem(skip_instance_cache=True, local_memory=True) + fs1 = MemoryFileSystem(skip_instance_cache=True) + fs2 = MemoryFileSystem(skip_instance_cache=True) + assert id(fs1) != id(fs2) + assert id(fs1.store) != id(fs2.store) + fs1.touch("/fs1.txt") + fs2.touch("/fs2.txt") + assert fs1.ls("/", detail=False) == ["/fs1.txt"] + assert fs2.ls("/", detail=False) == ["/fs2.txt"] + + +def test_separate_instances_1_0(): + fs1 = MemoryFileSystem(skip_instance_cache=True) # local + fs2 = MemoryFileSystem() # global + _clear(fs2) + assert id(fs1) != id(fs2) + assert id(fs1.store) != id(fs2.store) + fs1.touch("/fs1.txt") + fs2.touch("/fs2.txt") + assert fs1.ls("/", detail=False) == ["/fs1.txt"] # local + assert fs2.ls("/", detail=False) == ["/fs2.txt"] # global + + +def test_separate_instances_0_1(): + fs1 = MemoryFileSystem() # global + fs2 = MemoryFileSystem(skip_instance_cache=True) # local + _clear(fs1) + assert id(fs1) != id(fs2) + assert id(fs1.store) != id(fs2.store) + fs1.touch("/fs1.txt") + fs2.touch("/fs2.txt") + assert fs2.ls("/", detail=False) == ["/fs2.txt"] # local + assert fs1.ls("/", detail=False) == ["/fs1.txt"] # global def test_1(m): diff --git a/fsspec/registry.py b/fsspec/registry.py index 20dc1ab4c..96ffad7f4 100644 --- a/fsspec/registry.py +++ b/fsspec/registry.py @@ -171,9 +171,6 @@ def register_implementation(name, cls, clobber=False, errtxt=None): "err": "LibArchive requires to be installed", }, "local": {"class": "fsspec.implementations.local.LocalFileSystem"}, - "localmemory": { - "class": "fsspec.implementations.localmemory.LocalMemoryFileSystem" - }, "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"}, "oci": { "class": "ocifs.OCIFileSystem", diff --git a/fsspec/spec.py b/fsspec/spec.py index 5f6f9a104..3fb931472 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -63,6 +63,7 @@ def __init__(cls, *args, **kwargs): cls._pid = os.getpid() def __call__(cls, *args, **kwargs): + # print("_Cached call kwargs", kwargs) kwargs = apply_config(cls, kwargs) extra_tokens = tuple( getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes @@ -70,7 +71,9 @@ def __call__(cls, *args, **kwargs): token = tokenize( cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs ) - skip = kwargs.pop("skip_instance_cache", False) + # skip = kwargs.pop("skip_instance_cache", False) + skip = kwargs.get("skip_instance_cache", False) + assert skip in (True, False) if os.getpid() != cls._pid: cls._cache.clear() cls._pid = os.getpid() @@ -81,8 +84,12 @@ def __call__(cls, *args, **kwargs): obj = super().__call__(*args, **kwargs) # Setting _fs_token here causes some static linters to complain. obj._fs_token_ = token + # no. too late. must be passed to init + # obj._skip_instance_cache_ = skip + # print("_Cached call skip", skip) obj.storage_args = args obj.storage_options = kwargs + # setattr(obj, "_skip_instance_cache", skip) if obj.async_impl and obj.mirror_sync_methods: from .asyn import mirror_sync_methods @@ -160,6 +167,7 @@ def __init__(self, *args, **storage_options): warnings.warn("add_aliases has been removed.", FutureWarning) # This is set in _Cached self._fs_token_ = None + # self._skip_instance_cache_ = None @property def fsid(self): @@ -172,6 +180,10 @@ def fsid(self): def _fs_token(self): return self._fs_token_ + # @property + # def _skip_instance_cache(self): + # return self._skip_instance_cache_ + def __dask_tokenize__(self): return self._fs_token