From 78d8ba953c40508af15d516e0b935695d0805e15 Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Tue, 1 Jul 2025 08:34:13 -0700 Subject: [PATCH 1/7] rm_files fix --- adlfs/spec.py | 2 +- adlfs/tests/test_spec.py | 68 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index 5ff509e1..648fe9cb 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1285,7 +1285,7 @@ async def _rm_files( for file in file_paths: self.invalidate_cache(self._parent(file)) - sync_wrapper(_rm_files) + rm_files = sync_wrapper(_rm_files) async def _separate_directory_markers_for_non_empty_directories( self, file_paths: typing.Iterable[str] diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index bb5d8f78..827ac9e6 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -12,6 +12,8 @@ import numpy as np import pandas as pd import pytest +from azure.core.exceptions import ResourceNotFoundError +from azure.storage.blob.aio import BlobServiceClient as AIOBlobServiceClient from packaging.version import parse as parse_version from pandas.testing import assert_frame_equal @@ -2210,3 +2212,69 @@ def test_write_max_concurrency(storage, max_concurrency, blob_size, blocksize): with fs.open(path, "rb") as f: assert f.read() == data fs.rm(container_name, recursive=True) + +def test_rm_files(storage): + fs = AzureBlobFileSystem( + account_name=storage.account_name, + connection_string=CONN_STR, + ) + file_list = [ + "top_file.txt", + "root/a/file.txt", + "root/a1/file1.txt", + ] + + fs.rm_files("data", file_list) + for file in file_list: + with pytest.raises(FileNotFoundError): + fs.ls(f"data/{file}") + + +def test_rm_files_nonempty_directory_marker(storage): + fs = AzureBlobFileSystem( + account_name=storage.account_name, + connection_string=CONN_STR, + ) + + with pytest.raises(ResourceNotFoundError): + fs.rm_files("data", ["root/a/"]) + + assert fs.ls("data/root/a/") == ["data/root/a/file.txt"] + + +def test_rm_files_delete_directory_markers(storage, mocker): + mock_container = mocker.AsyncMock() + mock_container.delete_blob = mocker.AsyncMock(return_value=None) + mock_get_container_client = mocker.AsyncMock() + mock_get_container_client.__aenter__.return_value = mock_container + mock_get_container_client.__aexit__.return_value = None + mocker.patch.object( + AIOBlobServiceClient, + "get_container_client", + return_value=mock_get_container_client, + ) + fs = AzureBlobFileSystem( + account_name=storage.account_name, + connection_string=CONN_STR, + ) + + files = [blob.name for blob in storage.get_container_client("data").list_blobs()] + directory_markers = [ + "root/a/", + "root/a1/", + "root/b/", + "root/c/", + "root/d/", + "root/e+f/", + ] + + mocker.patch.object( + fs, + "_separate_directory_markers_for_non_empty_directories", + return_value=(files, directory_markers), + ) + + fs.rm_files("data", files) + expected_calls = [mocker.call(dir) for dir in reversed(directory_markers)] + actual_calls = mock_container.delete_blob.call_args_list[-len(directory_markers) :] + assert actual_calls == expected_calls From b62bb25db95f85445c5d628356afd7cc7adecfa5 Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Wed, 20 Aug 2025 16:53:09 -0700 Subject: [PATCH 2/7] added rm_file --- CHANGELOG.md | 2 +- adlfs/spec.py | 27 ++++++++++++++++- adlfs/tests/test_spec.py | 64 ++++++---------------------------------- 3 files changed, 36 insertions(+), 57 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e917e855..369d2113 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ Unreleased * Respect `AzureBlobFileSystem.protocol` tuple when removing protocols from fully-qualified paths provided to `AzureBlobFileSystem` methods. - +* Added `AzureBlobFileSystem.rm_file()` 2025.8.0 -------- diff --git a/adlfs/spec.py b/adlfs/spec.py index 648fe9cb..fa274fea 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1285,7 +1285,32 @@ async def _rm_files( for file in file_paths: self.invalidate_cache(self._parent(file)) - rm_files = sync_wrapper(_rm_files) + sync_wrapper(_rm_files) + + async def _rm_file( + self, path: typing.Union[str, typing.List[str]], delimiter: str = "/", **kwargs + ): + """Delete a file. + + Parameters + ---------- + path: str + File to delete. + """ + container_name, p, _ = self.split_path(path, delimiter=delimiter) + try: + if p != "": + await self._rm_files(container_name, [p.rstrip(delimiter)]) + else: + await self._rmdir(container_name) + except ResourceNotFoundError: + pass + except FileNotFoundError: + pass + except Exception as e: + raise RuntimeError("Failed to remove %s for %s", path, e) from e + + rm_file = sync_wrapper(_rm_file) async def _separate_directory_markers_for_non_empty_directories( self, file_paths: typing.Iterable[str] diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 827ac9e6..da479254 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -12,8 +12,6 @@ import numpy as np import pandas as pd import pytest -from azure.core.exceptions import ResourceNotFoundError -from azure.storage.blob.aio import BlobServiceClient as AIOBlobServiceClient from packaging.version import parse as parse_version from pandas.testing import assert_frame_equal @@ -2213,68 +2211,24 @@ def test_write_max_concurrency(storage, max_concurrency, blob_size, blocksize): assert f.read() == data fs.rm(container_name, recursive=True) -def test_rm_files(storage): - fs = AzureBlobFileSystem( - account_name=storage.account_name, - connection_string=CONN_STR, - ) - file_list = [ - "top_file.txt", - "root/a/file.txt", - "root/a1/file1.txt", - ] - - fs.rm_files("data", file_list) - for file in file_list: - with pytest.raises(FileNotFoundError): - fs.ls(f"data/{file}") - -def test_rm_files_nonempty_directory_marker(storage): +def test_rm_file(storage): fs = AzureBlobFileSystem( account_name=storage.account_name, connection_string=CONN_STR, ) + path = "data/top_file.txt" - with pytest.raises(ResourceNotFoundError): - fs.rm_files("data", ["root/a/"]) - - assert fs.ls("data/root/a/") == ["data/root/a/file.txt"] + fs.rm_file(path) + with pytest.raises(FileNotFoundError): + fs.ls(path) -def test_rm_files_delete_directory_markers(storage, mocker): - mock_container = mocker.AsyncMock() - mock_container.delete_blob = mocker.AsyncMock(return_value=None) - mock_get_container_client = mocker.AsyncMock() - mock_get_container_client.__aenter__.return_value = mock_container - mock_get_container_client.__aexit__.return_value = None - mocker.patch.object( - AIOBlobServiceClient, - "get_container_client", - return_value=mock_get_container_client, - ) +def test_rm_file_nonempty_directory(storage): fs = AzureBlobFileSystem( account_name=storage.account_name, connection_string=CONN_STR, ) - - files = [blob.name for blob in storage.get_container_client("data").list_blobs()] - directory_markers = [ - "root/a/", - "root/a1/", - "root/b/", - "root/c/", - "root/d/", - "root/e+f/", - ] - - mocker.patch.object( - fs, - "_separate_directory_markers_for_non_empty_directories", - return_value=(files, directory_markers), - ) - - fs.rm_files("data", files) - expected_calls = [mocker.call(dir) for dir in reversed(directory_markers)] - actual_calls = mock_container.delete_blob.call_args_list[-len(directory_markers) :] - assert actual_calls == expected_calls + path = "data/root/a/" + fs.rm_file(path) + assert fs.ls("data/root/a/") == ["data/root/a/file.txt"] From c54a64abf0f8be44dd97ec3e060afacf83925d21 Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Fri, 22 Aug 2025 15:05:00 -0700 Subject: [PATCH 3/7] updates --- adlfs/spec.py | 14 ++++++-------- adlfs/tests/test_spec.py | 10 ---------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index fa274fea..dd3aa62a 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1287,9 +1287,7 @@ async def _rm_files( sync_wrapper(_rm_files) - async def _rm_file( - self, path: typing.Union[str, typing.List[str]], delimiter: str = "/", **kwargs - ): + async def _rm_file(self, path: str, **kwargs): """Delete a file. Parameters @@ -1297,12 +1295,12 @@ async def _rm_file( path: str File to delete. """ - container_name, p, _ = self.split_path(path, delimiter=delimiter) + container_name, p, _ = self.split_path(path) try: - if p != "": - await self._rm_files(container_name, [p.rstrip(delimiter)]) - else: - await self._rmdir(container_name) + async with self.service_client.get_container_client( + container=container_name + ) as cc: + await cc.delete_blob(p) except ResourceNotFoundError: pass except FileNotFoundError: diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index da479254..9ed1eb5c 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -2222,13 +2222,3 @@ def test_rm_file(storage): fs.rm_file(path) with pytest.raises(FileNotFoundError): fs.ls(path) - - -def test_rm_file_nonempty_directory(storage): - fs = AzureBlobFileSystem( - account_name=storage.account_name, - connection_string=CONN_STR, - ) - path = "data/root/a/" - fs.rm_file(path) - assert fs.ls("data/root/a/") == ["data/root/a/file.txt"] From b8d09f35de507798519797507e686657c9796e18 Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Tue, 9 Sep 2025 16:08:16 -0700 Subject: [PATCH 4/7] updates --- adlfs/spec.py | 7 +------ adlfs/tests/test_spec.py | 6 +++++- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index dd3aa62a..078641a4 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1301,14 +1301,9 @@ async def _rm_file(self, path: str, **kwargs): container=container_name ) as cc: await cc.delete_blob(p) - except ResourceNotFoundError: - pass - except FileNotFoundError: - pass except Exception as e: raise RuntimeError("Failed to remove %s for %s", path, e) from e - - rm_file = sync_wrapper(_rm_file) + self.invalidate_cache(self._parent(path)) async def _separate_directory_markers_for_non_empty_directories( self, file_paths: typing.Iterable[str] diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 9ed1eb5c..95b9481c 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -2217,8 +2217,12 @@ def test_rm_file(storage): account_name=storage.account_name, connection_string=CONN_STR, ) - path = "data/top_file.txt" + path = "data/test_file.txt" + with fs.open(path, "wb") as f: + f.write(b"test content") + assert fs.exists(path) fs.rm_file(path) with pytest.raises(FileNotFoundError): fs.ls(path) + assert not fs.exists(path) From cab1c154b38e045c5b1915d520075c2f6b39028d Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Mon, 29 Sep 2025 10:12:23 -0700 Subject: [PATCH 5/7] updates --- adlfs/spec.py | 11 +++++++---- adlfs/tests/test_spec.py | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index 078641a4..6514c385 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1295,14 +1295,17 @@ async def _rm_file(self, path: str, **kwargs): path: str File to delete. """ - container_name, p, _ = self.split_path(path) + container_name, p, version_id = self.split_path(path) try: async with self.service_client.get_container_client( container=container_name ) as cc: - await cc.delete_blob(p) - except Exception as e: - raise RuntimeError("Failed to remove %s for %s", path, e) from e + await cc.delete_blob(p, version_id=version_id) + except ResourceNotFoundError as e: + raise FileNotFoundError( + errno.ENOENT, os.strerror(errno.ENOENT), path + ) from e + self.invalidate_cache(path) self.invalidate_cache(self._parent(path)) async def _separate_directory_markers_for_non_empty_directories( diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 95b9481c..c4eb0793 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -2226,3 +2226,22 @@ def test_rm_file(storage): with pytest.raises(FileNotFoundError): fs.ls(path) assert not fs.exists(path) + assert path not in fs.dircache + + +def test_rm_file_versioned_blob(storage): + fs = AzureBlobFileSystem( + account_name=storage.account_name, + connection_string=CONN_STR, + version_aware=True, + ) + path = "data/test_file.txt?versionid=latest" + with fs.open(path, "wb") as f: + f.write(b"test content") + + assert fs.exists(path) + fs.rm_file(path) + with pytest.raises(FileNotFoundError): + fs.ls(path) + assert not fs.exists(path) + assert path not in fs.dircache From 915295c3afb0a8b8eee00b85da07db0a7ca9bbe5 Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Wed, 1 Oct 2025 15:29:55 -0700 Subject: [PATCH 6/7] updates --- adlfs/tests/test_spec.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index c4eb0793..9ba2463b 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -2229,19 +2229,34 @@ def test_rm_file(storage): assert path not in fs.dircache -def test_rm_file_versioned_blob(storage): +def test_rm_file_versioned_blob(storage, mocker): + from azure.storage.blob.aio import ContainerClient + fs = AzureBlobFileSystem( account_name=storage.account_name, connection_string=CONN_STR, version_aware=True, ) - path = "data/test_file.txt?versionid=latest" + + mock_delete_blob = mocker.patch.object( + ContainerClient, "delete_blob", return_value=None + ) + path = f"data/test_file.txt?versionid={DEFAULT_VERSION_ID}" with fs.open(path, "wb") as f: f.write(b"test content") assert fs.exists(path) fs.rm_file(path) + mock_delete_blob.assert_called_once_with( + "test_file.txt", version_id=DEFAULT_VERSION_ID + ) + + +def test_rm_file_does_not_exist(storage): + fs = AzureBlobFileSystem( + account_name=storage.account_name, + connection_string=CONN_STR, + ) + path = "data/non_existent_file.txt" with pytest.raises(FileNotFoundError): - fs.ls(path) - assert not fs.exists(path) - assert path not in fs.dircache + fs.rm_file(path) From ef84f9730b75151555abc2beeeb22d98de52b7ce Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Wed, 1 Oct 2025 15:58:16 -0700 Subject: [PATCH 7/7] updates --- adlfs/tests/test_spec.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 9ba2463b..df49c55f 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -2237,15 +2237,10 @@ def test_rm_file_versioned_blob(storage, mocker): connection_string=CONN_STR, version_aware=True, ) - mock_delete_blob = mocker.patch.object( ContainerClient, "delete_blob", return_value=None ) path = f"data/test_file.txt?versionid={DEFAULT_VERSION_ID}" - with fs.open(path, "wb") as f: - f.write(b"test content") - - assert fs.exists(path) fs.rm_file(path) mock_delete_blob.assert_called_once_with( "test_file.txt", version_id=DEFAULT_VERSION_ID