fsspec · martindurant · Aug 26, 2025 · Jul 14, 2025 · Jul 25, 2025 · Aug 5, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ Unreleased
 - The block size is now used for partitioned uploads. Previously, 1 GiB was used for each uploaded block irrespective of the block size  
 - Updated default block size to be 50 MiB. Set `blocksize` for `AzureBlobFileSystem` or `block_size` when opening `AzureBlobFile` to revert back to 5 MiB default. 
 - `AzureBlobFile` now inherits the block size from `AzureBlobFileSystem` when fs.open() is called and a block_size is not passed in.
+- Added concurrency for `_async_upload_chunk`. Can be set using `max_concurrency` for `AzureBlobFileSystem`.
 
 
 2024.12.0

diff --git a/adlfs/spec.py b/adlfs/spec.py
@@ -2156,6 +2156,15 @@ def _get_chunks(self, data):
             yield data[start:end]
             start = end
 
+    async def _upload(self, chunk, block_id, semaphore):
+        async with semaphore:
+            async with self.container_client.get_blob_client(blob=self.blob) as bc:
+                await bc.stage_block(
+                    block_id=block_id,
+                    data=chunk,
+                    length=len(chunk),
+                )
+
     async def _async_upload_chunk(self, final: bool = False, **kwargs):
         """
         Write one part of a multi-block file upload
@@ -2175,17 +2184,22 @@ async def _async_upload_chunk(self, final: bool = False, **kwargs):
             commit_kw["headers"] = {"If-None-Match": "*"}
         if self.mode in {"wb", "xb"}:
             try:
-                for chunk in self._get_chunks(data):
-                    async with self.container_client.get_blob_client(
-                        blob=self.blob
-                    ) as bc:
-                        await bc.stage_block(
-                            block_id=block_id,
-                            data=chunk,
-                            length=len(chunk),
-                        )
-                        self._block_list.append(block_id)
-                        block_id = self._get_block_id(self._block_list)
+                max_concurrency = self.fs.max_concurrency
+                semaphore = asyncio.Semaphore(max_concurrency)
+                tasks = []
+                block_ids = self._block_list or []
+                start_idx = len(block_ids)
+                chunks = list(self._get_chunks(data))
+                for _ in range(len(chunks)):
+                    block_ids.append(block_id)
+                    block_id = self._get_block_id(block_ids)
+
+                if chunks:
+                    self._block_list = block_ids
+                for chunk, block_id in zip(chunks, block_ids[start_idx:]):
+                    tasks.append(self._upload(chunk, block_id, semaphore))
+
+                await asyncio.gather(*tasks)
 
                 if final:
                     block_list = [BlobBlock(_id) for _id in self._block_list]

diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py
@@ -2140,3 +2140,31 @@ def test_blobfile_default_blocksize(storage):
         "data/root/a/file.txt",
     )
     assert f.blocksize == 50 * 2**20
+
+
+@pytest.mark.parametrize(
+    "max_concurrency, blob_size",
+    [
+        (1, 51 * 2**20),
+        (4, 200 * 2**20),
+        (4, 49 * 2**20),
+    ],
+)
+def test_max_concurrency(storage, max_concurrency, blob_size):
+    fs = AzureBlobFileSystem(
+        account_name=storage.account_name,
+        connection_string=CONN_STR,
+        max_concurrency=max_concurrency,
+    )
+    data = os.urandom(blob_size)
+    fs.mkdir("large-file-container")
+    path = "large-file-container/blob.txt"
+
+    with fs.open(path, "wb") as f:
+        f.write(data)
+
+    assert fs.exists(path)
+    assert fs.size(path) == blob_size
+
+    with fs.open(path, "rb") as f:
+        assert f.read() == data