diff --git a/HISTORY.md b/HISTORY.md index d6cfd234..1fc37019 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,9 @@ # cloudpathlib Changelog +## Unreleased + +- Fixed `rmtree` fail on Azure with no `hns` and more than 256 blobs to drop + ## v0.21.0 (2025-03-03) - Removed support for deprecated env var that had a typo (`CLOUPATHLIB_FILE_CACHE_MODE`; you should use `CLOUDPATHLIB_FILE_CACHE_MODE`). diff --git a/cloudpathlib/azure/azblobclient.py b/cloudpathlib/azure/azblobclient.py index e4b54257..56031ed9 100644 --- a/cloudpathlib/azure/azblobclient.py +++ b/cloudpathlib/azure/azblobclient.py @@ -4,6 +4,7 @@ from http import HTTPStatus from pathlib import Path from typing import Any, Callable, Dict, Iterable, Optional, Tuple, Union +from itertools import islice try: from typing import cast @@ -437,11 +438,12 @@ def _remove(self, cloud_path: AzureBlobPath, missing_ok: bool = True) -> None: _hns_rmtree(self.data_lake_client, cloud_path.container, cloud_path.blob) return - blobs = [ + blobs = ( b.blob for b, is_dir in self._list_dir(cloud_path, recursive=True) if not is_dir - ] + ) container_client = self.service_client.get_container_client(cloud_path.container) - container_client.delete_blobs(*blobs) + while batch := tuple(islice(blobs, 256)): + container_client.delete_blobs(*batch) elif file_or_dir == "file": blob = self.service_client.get_blob_client( container=cloud_path.container, blob=cloud_path.blob diff --git a/tests/test_azure_specific.py b/tests/test_azure_specific.py index 78b53bf3..142730b4 100644 --- a/tests/test_azure_specific.py +++ b/tests/test_azure_specific.py @@ -206,3 +206,13 @@ def test_adls_gen2_rename(azure_gen2_rig): p2 = p.rename(azure_gen2_rig.create_cloud_path("dir2")) assert not p.exists() assert p2.exists() + + +def test_batched_rmtree_no_hns(azure_rig): + p = azure_rig.create_cloud_path("new_dir") + + p.mkdir() + for i in range(400): + (p / f"{i}.txt").write_text("content") + p.rmtree() + assert not p.exists()