fsspec · martindurant · Aug 19, 2025 · Aug 11, 2025 · Aug 12, 2025 · Aug 13, 2025
diff --git a/adlfs/spec.py b/adlfs/spec.py
@@ -69,7 +69,7 @@
     "is_current_version",
 ]
 _ROOT_PATH = "/"
-_DEFAULT_BLOCK_SIZE = 4 * 1024 * 1024
+_DEFAULT_BLOCK_SIZE = 50 * 2**20
 
 _SOCKET_TIMEOUT_DEFAULT = object()
 
@@ -177,8 +177,7 @@ class AzureBlobFileSystem(AsyncFileSystem):
         The credentials with which to authenticate.  Optional if the account URL already has a SAS token.
         Can include an instance of TokenCredential class from azure.identity.aio.
     blocksize: int
-        The block size to use for download/upload operations. Defaults to hardcoded value of
-        ``BlockBlobService.MAX_BLOCK_SIZE``
+        The block size to use for download/upload operations. Defaults to 50 MiB
     client_id: str
         Client ID to use when authenticating using an AD Service Principal client/secret.
     client_secret: str
@@ -1879,6 +1878,8 @@ def _open(
             is versioning aware and blob versioning is enabled on the releveant container.
         """
         logger.debug(f"_open:  {path}")
+        if block_size is None:
+            block_size = self.blocksize
         if not self.version_aware and version_id:
             raise ValueError(
                 "version_id cannot be specified if the filesystem "
@@ -1901,7 +1902,7 @@ def _open(
 class AzureBlobFile(AbstractBufferedFile):
     """File-like operations on Azure Blobs"""
 
-    DEFAULT_BLOCK_SIZE = 5 * 2**20
+    DEFAULT_BLOCK_SIZE = _DEFAULT_BLOCK_SIZE
 
     def __init__(
         self,
@@ -2146,7 +2147,7 @@ async def _async_initiate_upload(self, **kwargs):
 
     _initiate_upload = sync_wrapper(_async_initiate_upload)
 
-    def _get_chunks(self, data, chunk_size=1024**3):  # Keeping the chunk size as 1 GB
+    def _get_chunks(self, data, chunk_size):
         start = 0
         length = len(data)
         while start < length:
@@ -2173,7 +2174,7 @@ async def _async_upload_chunk(self, final: bool = False, **kwargs):
             commit_kw["headers"] = {"If-None-Match": "*"}
         if self.mode in {"wb", "xb"}:
             try:
-                for chunk in self._get_chunks(data):
+                for chunk in self._get_chunks(data, self.blocksize):
                     async with self.container_client.get_blob_client(
                         blob=self.blob
                     ) as bc:

diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py
@@ -1,7 +1,9 @@
 import datetime
+import math
 import os
 import tempfile
 from unittest import mock
+from unittest.mock import patch
 
 import azure.storage.blob.aio
 import dask.dataframe as dd
@@ -2045,3 +2047,37 @@ def test_open_file_x(storage: azure.storage.blob.BlobServiceClient, tmpdir):
         with fs.open("data/afile", "xb") as f:
             pass
     assert fs.cat_file("data/afile") == b"data"
+
+
+@pytest.mark.parametrize("blocksize", [5 * 2**20, 50 * 2**20, 100 * 2**20])
+def test_number_of_blocks(storage, mocker, blocksize):
+
+    fs = AzureBlobFileSystem(
+        account_name=storage.account_name,
+        connection_string=CONN_STR,
+        blocksize=blocksize,
+    )
+
+    content = b"1" * (blocksize * 2 + 1)
+    with fs.open("data/root/a/file.txt", "wb", blocksize=blocksize) as f:
+        mocker.patch(
+            "azure.storage.blob.aio.BlobClient.commit_block_list", autospec=True
+        )
+        with patch(
+            "azure.storage.blob.aio.BlobClient.stage_block", autospec=True
+        ) as mock_stage_block:
+            f.write(content)
+            expected_blocks = math.ceil(len(content) / blocksize)
+            actual_blocks = mock_stage_block.call_count
+            assert actual_blocks == expected_blocks
+
+
+def test_block_size(storage):
+    fs = AzureBlobFileSystem(
+        account_name=storage.account_name,
+        connection_string=CONN_STR,
+        blocksize=5 * 2**20,
+    )
+
+    with fs.open("data/root/a/file.txt", "wb") as f:
+        assert f.blocksize == 5 * 2**20