From 280ef2fcb20e313682d115ed81642b642daa8b5f Mon Sep 17 00:00:00 2001 From: siemdejong <28396796+siemdejong@users.noreply.github.com> Date: Thu, 24 Apr 2025 15:56:17 +0200 Subject: [PATCH 1/5] fix: keep same permissions when using transaction Uses https://stackoverflow.com/a/44130549 to check the current umask, temporarily (one line) change it to 0o666. Apply the permission modification. --- fsspec/implementations/local.py | 7 +++++++ fsspec/implementations/tests/test_local.py | 13 +++++++++++++ 2 files changed, 20 insertions(+) diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index ee2995d63..3a5b24fbe 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -417,6 +417,13 @@ def commit(self): if self.autocommit: raise RuntimeError("Can only commit if not already set to autocommit") shutil.move(self.temp, self.path) + try: + # Get the current umask and set it temporarily to 0o666. + umask = os.umask(0o666) + os.umask(umask) + os.chmod(self.path, 0o666 & ~umask) + except RuntimeError: + pass def discard(self): if self.autocommit: diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 2820c14f5..5ad782ed0 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -508,6 +508,19 @@ def test_commit_discard(tmpdir): assert not fs.exists(tmpdir + "/bfile") +def test_same_permissions_with_and_without_transaction(tmpdir): + tmpdir = str(tmpdir) + + with fsspec.open(tmpdir + "/afile", 'wb') as f: + f.write(b'data') + + fs, urlpath = fsspec.core.url_to_fs(tmpdir + "/bfile") + with fs.transaction, fs.open(urlpath, 'wb') as f: + f.write(b'data') + + assert fs.info(tmpdir + "/afile")["mode"] == fs.info(tmpdir + "/bfile")["mode"] + + def test_make_path_posix(): cwd = os.getcwd() if WIN: From 66258a1816bfc302b1fb51f0b64cdb360d84d6d1 Mon Sep 17 00:00:00 2001 From: siemdejong <28396796+siemdejong@users.noreply.github.com> Date: Thu, 24 Apr 2025 16:52:57 +0200 Subject: [PATCH 2/5] fix: cache umask Cache umask and test for a cache hit. Apply precommit. --- fsspec/implementations/local.py | 15 +++++--- fsspec/implementations/tests/test_local.py | 42 ++++++++++++++++++---- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index 3a5b24fbe..38de41005 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -6,6 +6,7 @@ import shutil import stat import tempfile +from functools import lru_cache from fsspec import AbstractFileSystem from fsspec.compression import compr @@ -354,6 +355,14 @@ def trailing_sep(path): return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep)) +@lru_cache(maxsize=1) +def get_umask(mask: int = 0o666) -> int: + """Get the current umask and set it temporarily to 0o666.""" + value = os.umask(mask) + os.umask(value) + return value + + class LocalFileOpener(io.IOBase): def __init__( self, path, mode, autocommit=True, fs=None, compression=None, **kwargs @@ -418,10 +427,8 @@ def commit(self): raise RuntimeError("Can only commit if not already set to autocommit") shutil.move(self.temp, self.path) try: - # Get the current umask and set it temporarily to 0o666. - umask = os.umask(0o666) - os.umask(umask) - os.chmod(self.path, 0o666 & ~umask) + mask = 0o666 + os.chmod(self.path, mask & ~get_umask(mask)) except RuntimeError: pass diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 5ad782ed0..506904b9a 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -15,7 +15,7 @@ import fsspec from fsspec import compression from fsspec.core import OpenFile, get_fs_token_paths, open_files -from fsspec.implementations.local import LocalFileSystem, make_path_posix +from fsspec.implementations.local import LocalFileSystem, get_umask, make_path_posix from fsspec.tests.test_utils import WIN files = { @@ -511,16 +511,46 @@ def test_commit_discard(tmpdir): def test_same_permissions_with_and_without_transaction(tmpdir): tmpdir = str(tmpdir) - with fsspec.open(tmpdir + "/afile", 'wb') as f: - f.write(b'data') + with fsspec.open(tmpdir + "/afile", "wb") as f: + f.write(b"data") fs, urlpath = fsspec.core.url_to_fs(tmpdir + "/bfile") - with fs.transaction, fs.open(urlpath, 'wb') as f: - f.write(b'data') - + with fs.transaction, fs.open(urlpath, "wb") as f: + f.write(b"data") + assert fs.info(tmpdir + "/afile")["mode"] == fs.info(tmpdir + "/bfile")["mode"] +def test_get_umask_uses_cache(): + get_umask.cache_clear() # Clear the cache before the test for testing purposes. + get_umask() # Retrieve the current umask value. + get_umask() # Retrieve the umask again; this should result in a cache hit. + assert get_umask.cache_info().hits == 1 + + +def test_multiple_file_transaction_use_umask_cache(tmpdir): + get_umask.cache_clear() # Clear the cache before the test for testing purposes. + fs = LocalFileSystem() + with fs.transaction: + with fs.open(tmpdir + "/afile", "wb") as f: + f.write(b"data") + with fs.open(tmpdir + "/bfile", "wb") as f: + f.write(b"data") + assert get_umask.cache_info().hits == 1 + + +def test_multiple_transactions_use_umask_cache(tmpdir): + get_umask.cache_clear() # Clear the cache before the test for testing purposes. + fs = LocalFileSystem() + with fs.transaction: + with fs.open(tmpdir + "/afile", "wb") as f: + f.write(b"data") + with fs.transaction: + with fs.open(tmpdir + "/bfile", "wb") as f: + f.write(b"data") + assert get_umask.cache_info().hits == 1 + + def test_make_path_posix(): cwd = os.getcwd() if WIN: From f9b8b71c49fb48d5be4883e4d2267f3c3d94b06b Mon Sep 17 00:00:00 2001 From: siemdejong <28396796+siemdejong@users.noreply.github.com> Date: Thu, 24 Apr 2025 17:00:56 +0200 Subject: [PATCH 3/5] tests: add cache over multiple filesystems test --- fsspec/implementations/tests/test_local.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 506904b9a..2c48adad9 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -550,6 +550,16 @@ def test_multiple_transactions_use_umask_cache(tmpdir): f.write(b"data") assert get_umask.cache_info().hits == 1 +def test_multiple_filesystems_use_umask_cache(tmpdir): + get_umask.cache_clear() # Clear the cache before the test for testing purposes. + fs1 = LocalFileSystem() + fs2 = LocalFileSystem() + with fs1.transaction, fs1.open(tmpdir + "/afile", "wb") as f: + f.write(b"data") + with fs2.transaction, fs2.open(tmpdir + "/bfile", "wb") as f: + f.write(b"data") + assert get_umask.cache_info().hits == 1 + def test_make_path_posix(): cwd = os.getcwd() From 5918ca320ea8d865da856f3234935c3b0c1d03ea Mon Sep 17 00:00:00 2001 From: siemdejong <28396796+siemdejong@users.noreply.github.com> Date: Thu, 24 Apr 2025 17:14:53 +0200 Subject: [PATCH 4/5] style: run precommit --- fsspec/implementations/tests/test_local.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 2c48adad9..2e4c07c9d 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -550,6 +550,7 @@ def test_multiple_transactions_use_umask_cache(tmpdir): f.write(b"data") assert get_umask.cache_info().hits == 1 + def test_multiple_filesystems_use_umask_cache(tmpdir): get_umask.cache_clear() # Clear the cache before the test for testing purposes. fs1 = LocalFileSystem() From be84330bff3a29a18966f32f4e9929e2adf2ddc3 Mon Sep 17 00:00:00 2001 From: siemdejong <28396796+siemdejong@users.noreply.github.com> Date: Thu, 24 Apr 2025 17:26:40 +0200 Subject: [PATCH 5/5] docs: fix docstring --- fsspec/implementations/local.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index 38de41005..70dfd901e 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -357,7 +357,12 @@ def trailing_sep(path): @lru_cache(maxsize=1) def get_umask(mask: int = 0o666) -> int: - """Get the current umask and set it temporarily to 0o666.""" + """Get the current umask. + + Follows https://stackoverflow.com/a/44130549 to get the umask. + Temporarily sets the umask to the given value, and then resets it to the + original value. + """ value = os.umask(mask) os.umask(value) return value