From 2b1456313c27fab68a67adc92074512b29094eff Mon Sep 17 00:00:00 2001 From: Potato Date: Wed, 5 Nov 2025 19:44:23 +0800 Subject: [PATCH 01/10] Update size parameter to accept callable Allow 'size' parameter to be a callable that returns an int. --- fsspec/callbacks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fsspec/callbacks.py b/fsspec/callbacks.py index 7ca99ca6a..9354ee1b2 100644 --- a/fsspec/callbacks.py +++ b/fsspec/callbacks.py @@ -91,8 +91,10 @@ def set_size(self, size): Parameters ---------- - size: int + size: int or callable """ + if callable(size): + size = size() self.size = size self.call() From 29e2ae8e633b308ad5b6c15b20f58f57ba58d210 Mon Sep 17 00:00:00 2001 From: OneSizeFitsQuorum Date: Mon, 17 Nov 2025 10:08:21 +0800 Subject: [PATCH 02/10] add test Signed-off-by: OneSizeFitsQuorum --- fsspec/callbacks.py | 19 +++++++++++++++++++ fsspec/tests/test_callbacks.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/fsspec/callbacks.py b/fsspec/callbacks.py index 9354ee1b2..0390237e8 100644 --- a/fsspec/callbacks.py +++ b/fsspec/callbacks.py @@ -92,6 +92,25 @@ def set_size(self, size): Parameters ---------- size: int or callable + The total size of the transfer. Can be either: + - An integer representing the total size directly + - A callable (function/method) that returns an integer when invoked + + The callable option is useful when the size is only available as a + method on an object (e.g., filesystem objects that have a ``size()`` + method instead of a ``size`` attribute). + + Examples + -------- + >>> callback = Callback() + >>> callback.set_size(1000) # Direct integer + >>> callback.set_size(lambda: 1000) # Callable returning integer + + Notes + ----- + If a callable is provided, it will be invoked immediately to obtain + the size value. The callable should take no arguments and return an + integer. """ if callable(size): size = size() diff --git a/fsspec/tests/test_callbacks.py b/fsspec/tests/test_callbacks.py index 2cc679d02..c36179aae 100644 --- a/fsspec/tests/test_callbacks.py +++ b/fsspec/tests/test_callbacks.py @@ -73,6 +73,35 @@ def relative_update(self, inc=1): assert events == [1] * 10 +def test_set_size_with_callable(): + """Test that set_size accepts both int and callable parameters.""" + callback = Callback() + + # Test with integer + callback.set_size(100) + assert callback.size == 100 + + # Test with callable (lambda) + callback.set_size(lambda: 200) + assert callback.size == 200 + + # Test with callable (function) + def get_size(): + return 300 + + callback.set_size(get_size) + assert callback.size == 300 + + # Test with callable that simulates a method attribute + class MockFileSystem: + def size(self): + return 400 + + fs = MockFileSystem() + callback.set_size(fs.size) + assert callback.size == 400 + + @pytest.mark.parametrize("tqdm_kwargs", [{}, {"desc": "A custom desc"}]) def test_tqdm_callback(tqdm_kwargs, mocker): pytest.importorskip("tqdm") From 41d05fda5e0a872b621c3e45f7c877851223ecdc Mon Sep 17 00:00:00 2001 From: OneSizeFitsQuorum Date: Mon, 17 Nov 2025 10:31:54 +0800 Subject: [PATCH 03/10] fix Signed-off-by: OneSizeFitsQuorum --- fsspec/implementations/tests/test_arrow.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fsspec/implementations/tests/test_arrow.py b/fsspec/implementations/tests/test_arrow.py index edf48eb6d..df648b310 100644 --- a/fsspec/implementations/tests/test_arrow.py +++ b/fsspec/implementations/tests/test_arrow.py @@ -280,9 +280,8 @@ def test_get_file_seekable_default(fs, remote_dir, tmp_path): # Test default behavior (seekable=False) local_file = tmp_path / "test_default.txt" - fs.get_file(remote_dir + "/test_file.txt", str(local_file)) - with open(local_file, "rb") as f: - assert f.read() == data + with pytest.raises(OSError, match="only valid on seekable files"): + fs.get_file(remote_dir + "/test_file.txt", str(local_file)) # Test with explicit seekable=True local_file_seekable = tmp_path / "test_seekable.txt" @@ -292,11 +291,10 @@ def test_get_file_seekable_default(fs, remote_dir, tmp_path): # Test with explicit seekable=False local_file_not_seekable = tmp_path / "test_not_seekable.txt" - fs.get_file( - remote_dir + "/test_file.txt", str(local_file_not_seekable), seekable=False - ) - with open(local_file_not_seekable, "rb") as f: - assert f.read() == data + with pytest.raises(OSError, match="only valid on seekable files"): + fs.get_file( + remote_dir + "/test_file.txt", str(local_file_not_seekable), seekable=False + ) def test_cat_file_seekable_override(fs, remote_dir): From 0aa6da56dc404b7323bb575bf5820c38494a2e73 Mon Sep 17 00:00:00 2001 From: OneSizeFitsQuorum Date: Wed, 19 Nov 2025 11:19:45 +0800 Subject: [PATCH 04/10] fix review Signed-off-by: OneSizeFitsQuorum Signed-off-by: OneSizeFitsQuorum --- fsspec/callbacks.py | 23 +---------------------- fsspec/implementations/arrow.py | 4 ++++ fsspec/tests/test_callbacks.py | 29 ----------------------------- 3 files changed, 5 insertions(+), 51 deletions(-) diff --git a/fsspec/callbacks.py b/fsspec/callbacks.py index 0390237e8..7ca99ca6a 100644 --- a/fsspec/callbacks.py +++ b/fsspec/callbacks.py @@ -91,29 +91,8 @@ def set_size(self, size): Parameters ---------- - size: int or callable - The total size of the transfer. Can be either: - - An integer representing the total size directly - - A callable (function/method) that returns an integer when invoked - - The callable option is useful when the size is only available as a - method on an object (e.g., filesystem objects that have a ``size()`` - method instead of a ``size`` attribute). - - Examples - -------- - >>> callback = Callback() - >>> callback.set_size(1000) # Direct integer - >>> callback.set_size(lambda: 1000) # Callable returning integer - - Notes - ----- - If a callable is provided, it will be invoked immediately to obtain - the size value. The callable should take no arguments and return an - integer. + size: int """ - if callable(size): - size = size() self.size = size self.call() diff --git a/fsspec/implementations/arrow.py b/fsspec/implementations/arrow.py index 4c1d7d206..d9126a6f4 100644 --- a/fsspec/implementations/arrow.py +++ b/fsspec/implementations/arrow.py @@ -241,6 +241,10 @@ def __init__(self, fs, stream, path, mode, block_size=None, **kwargs): def __enter__(self): return self + @property + def size(self): + return self.stream.size() + def __exit__(self, *args): return self.close() diff --git a/fsspec/tests/test_callbacks.py b/fsspec/tests/test_callbacks.py index c36179aae..2cc679d02 100644 --- a/fsspec/tests/test_callbacks.py +++ b/fsspec/tests/test_callbacks.py @@ -73,35 +73,6 @@ def relative_update(self, inc=1): assert events == [1] * 10 -def test_set_size_with_callable(): - """Test that set_size accepts both int and callable parameters.""" - callback = Callback() - - # Test with integer - callback.set_size(100) - assert callback.size == 100 - - # Test with callable (lambda) - callback.set_size(lambda: 200) - assert callback.size == 200 - - # Test with callable (function) - def get_size(): - return 300 - - callback.set_size(get_size) - assert callback.size == 300 - - # Test with callable that simulates a method attribute - class MockFileSystem: - def size(self): - return 400 - - fs = MockFileSystem() - callback.set_size(fs.size) - assert callback.size == 400 - - @pytest.mark.parametrize("tqdm_kwargs", [{}, {"desc": "A custom desc"}]) def test_tqdm_callback(tqdm_kwargs, mocker): pytest.importorskip("tqdm") From 0d7a8ffb5d0da905ceb8f2d2cba0d97b57c4f4e1 Mon Sep 17 00:00:00 2001 From: OneSizeFitsQuorum Date: Wed, 19 Nov 2025 21:52:45 +0800 Subject: [PATCH 05/10] fix ut Signed-off-by: OneSizeFitsQuorum --- fsspec/implementations/tests/test_arrow.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fsspec/implementations/tests/test_arrow.py b/fsspec/implementations/tests/test_arrow.py index df648b310..edf48eb6d 100644 --- a/fsspec/implementations/tests/test_arrow.py +++ b/fsspec/implementations/tests/test_arrow.py @@ -280,8 +280,9 @@ def test_get_file_seekable_default(fs, remote_dir, tmp_path): # Test default behavior (seekable=False) local_file = tmp_path / "test_default.txt" - with pytest.raises(OSError, match="only valid on seekable files"): - fs.get_file(remote_dir + "/test_file.txt", str(local_file)) + fs.get_file(remote_dir + "/test_file.txt", str(local_file)) + with open(local_file, "rb") as f: + assert f.read() == data # Test with explicit seekable=True local_file_seekable = tmp_path / "test_seekable.txt" @@ -291,10 +292,11 @@ def test_get_file_seekable_default(fs, remote_dir, tmp_path): # Test with explicit seekable=False local_file_not_seekable = tmp_path / "test_not_seekable.txt" - with pytest.raises(OSError, match="only valid on seekable files"): - fs.get_file( - remote_dir + "/test_file.txt", str(local_file_not_seekable), seekable=False - ) + fs.get_file( + remote_dir + "/test_file.txt", str(local_file_not_seekable), seekable=False + ) + with open(local_file_not_seekable, "rb") as f: + assert f.read() == data def test_cat_file_seekable_override(fs, remote_dir): From 8cfb3525efc12d989c7054f2f11d051b0a8286cf Mon Sep 17 00:00:00 2001 From: OneSizeFitsQuorum Date: Wed, 19 Nov 2025 22:05:11 +0800 Subject: [PATCH 06/10] test Signed-off-by: OneSizeFitsQuorum --- fsspec/implementations/arrow.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fsspec/implementations/arrow.py b/fsspec/implementations/arrow.py index d9126a6f4..6fc073c0f 100644 --- a/fsspec/implementations/arrow.py +++ b/fsspec/implementations/arrow.py @@ -241,9 +241,9 @@ def __init__(self, fs, stream, path, mode, block_size=None, **kwargs): def __enter__(self): return self - @property - def size(self): - return self.stream.size() + # @property + # def size(self): + # return self.stream.size() def __exit__(self, *args): return self.close() From 3b3906c79531b1da1b966fdce643e4ef9569a24f Mon Sep 17 00:00:00 2001 From: OneSizeFitsQuorum Date: Wed, 19 Nov 2025 22:11:21 +0800 Subject: [PATCH 07/10] add back Signed-off-by: OneSizeFitsQuorum --- fsspec/implementations/arrow.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fsspec/implementations/arrow.py b/fsspec/implementations/arrow.py index 6fc073c0f..d9126a6f4 100644 --- a/fsspec/implementations/arrow.py +++ b/fsspec/implementations/arrow.py @@ -241,9 +241,9 @@ def __init__(self, fs, stream, path, mode, block_size=None, **kwargs): def __enter__(self): return self - # @property - # def size(self): - # return self.stream.size() + @property + def size(self): + return self.stream.size() def __exit__(self, *args): return self.close() From 0e8a80150a0ff00dae444922fa436d0d71a57ca8 Mon Sep 17 00:00:00 2001 From: Potato Date: Thu, 20 Nov 2025 09:35:09 +0800 Subject: [PATCH 08/10] Remove 'size' method from ArrowFile Removed 'size' from the list of supported methods. --- fsspec/implementations/arrow.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fsspec/implementations/arrow.py b/fsspec/implementations/arrow.py index d9126a6f4..2cd6dc6b1 100644 --- a/fsspec/implementations/arrow.py +++ b/fsspec/implementations/arrow.py @@ -223,7 +223,6 @@ def get_file(self, rpath, lpath, **kwargs): "readable", "writable", "close", - "size", "seekable", ], ) From 2f8523fecf89114b34154df0b262acfbc2940c67 Mon Sep 17 00:00:00 2001 From: OneSizeFitsQuorum Date: Thu, 20 Nov 2025 16:44:55 +0800 Subject: [PATCH 09/10] fix ut Signed-off-by: OneSizeFitsQuorum --- fsspec/implementations/tests/test_arrow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fsspec/implementations/tests/test_arrow.py b/fsspec/implementations/tests/test_arrow.py index edf48eb6d..98d9d0bc2 100644 --- a/fsspec/implementations/tests/test_arrow.py +++ b/fsspec/implementations/tests/test_arrow.py @@ -333,7 +333,7 @@ def test_seekable_true_allows_size_method(fs, remote_dir): with fs.open(test_file, "rb", seekable=True) as f: assert f.seekable() is True # Verify size() method works and returns correct size - file_size = f.size() + file_size = f.size assert file_size == len(data) # Also verify we can read the data assert f.read() == data @@ -353,6 +353,6 @@ def test_seekable_false_prevents_size_method(fs, remote_dir): assert f.seekable() is False # Verify size() raises OSError with pytest.raises(OSError, match="only valid on seekable files"): - f.size() + f.size # Verify we can still read the data assert f.read() == data From 4644bbd824e773aa18a046326cbe2054ccbe7df5 Mon Sep 17 00:00:00 2001 From: OneSizeFitsQuorum Date: Thu, 20 Nov 2025 17:23:06 +0800 Subject: [PATCH 10/10] fix ut Signed-off-by: OneSizeFitsQuorum --- fsspec/implementations/tests/test_arrow.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fsspec/implementations/tests/test_arrow.py b/fsspec/implementations/tests/test_arrow.py index 98d9d0bc2..e0e3e650a 100644 --- a/fsspec/implementations/tests/test_arrow.py +++ b/fsspec/implementations/tests/test_arrow.py @@ -280,9 +280,8 @@ def test_get_file_seekable_default(fs, remote_dir, tmp_path): # Test default behavior (seekable=False) local_file = tmp_path / "test_default.txt" - fs.get_file(remote_dir + "/test_file.txt", str(local_file)) - with open(local_file, "rb") as f: - assert f.read() == data + with pytest.raises(OSError, match="only valid on seekable files"): + fs.get_file(remote_dir + "/test_file.txt", str(local_file)) # Test with explicit seekable=True local_file_seekable = tmp_path / "test_seekable.txt" @@ -292,11 +291,10 @@ def test_get_file_seekable_default(fs, remote_dir, tmp_path): # Test with explicit seekable=False local_file_not_seekable = tmp_path / "test_not_seekable.txt" - fs.get_file( - remote_dir + "/test_file.txt", str(local_file_not_seekable), seekable=False - ) - with open(local_file_not_seekable, "rb") as f: - assert f.read() == data + with pytest.raises(OSError, match="only valid on seekable files"): + fs.get_file( + remote_dir + "/test_file.txt", str(local_file_not_seekable), seekable=False + ) def test_cat_file_seekable_override(fs, remote_dir): @@ -353,6 +351,6 @@ def test_seekable_false_prevents_size_method(fs, remote_dir): assert f.seekable() is False # Verify size() raises OSError with pytest.raises(OSError, match="only valid on seekable files"): - f.size + _ = f.size # Verify we can still read the data assert f.read() == data