Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ Unreleased
- Fix issue where ``AzureBlobFile`` did not respect ``location_mode`` parameter
from parent ``AzureBlobFileSystem`` when using SAS credentials and connecting to
new SDK clients.
- The block size is now used for uploads. Previously, it was always 1 GiB irrespective of the block size
- Updated default block size to be 50 MiB
- The block size is now used for partitioned uploads. Previously, 1 GiB was used for each uploaded block irrespective of the block size
- Updated default block size to be 50 MiB. Set `blocksize` for `AzureBlobFileSystem` or `block_size` when opening `AzureBlobFile` to revert back to 5 MiB default.
- `AzureBlobFile` now inherits the block size from `AzureBlobFileSystem` when fs.open() is called and a block_size is not passed in.


Expand Down
30 changes: 18 additions & 12 deletions adlfs/tests/test_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -2048,7 +2048,7 @@ def test_open_file_x(storage: azure.storage.blob.BlobServiceClient, tmpdir):
assert fs.cat_file("data/afile") == b"data"


def test_number_of_blocks(storage, mocker):
def test_uses_block_size_for_partitioned_uploads(storage, mocker):
from azure.storage.blob.aio import BlobClient

blocksize = 5 * 2**20
Expand All @@ -2075,15 +2075,21 @@ def test_number_of_blocks(storage, mocker):


@pytest.mark.parametrize(
"filesystem_blocksize, file_blocksize, expected_blocksize",
"filesystem_blocksize, file_blocksize, expected_blocksize, expected_filesystem_blocksize",
[
(None, None, 50 * 2**20),
(50 * 2**20, None, 50 * 2**20),
(None, 5 * 2**20, 5 * 2**20),
(50 * 2**20, 7 * 2**20, 7 * 2**20),
(None, None, 50 * 2**20, None),
Copy link
Collaborator

@kyleknap kyleknap Aug 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of directly passing None, let's just treat it as omitting block_size/blocksize in the consturctor/open call. Mainly looking at the expected_filesystem_blocksize, I'm not sure that asserting that expected filesystem blocksize is None is what we want when we are mainly checking that it falls back to the default block size of 50 * 2 * 20.

(7 * 2**20, None, 7 * 2**20, 7 * 2**20),
(None, 5 * 2**20, 5 * 2**20, None),
(40 * 2**20, 7 * 2**20, 7 * 2**20, 40 * 2**20),
],
)
def test_block_size(storage, filesystem_blocksize, file_blocksize, expected_blocksize):
def test_block_size(
storage,
filesystem_blocksize,
file_blocksize,
expected_blocksize,
expected_filesystem_blocksize,
):
fs = AzureBlobFileSystem(
account_name=storage.account_name,
connection_string=CONN_STR,
Expand All @@ -2092,6 +2098,7 @@ def test_block_size(storage, filesystem_blocksize, file_blocksize, expected_bloc

with fs.open("data/root/a/file.txt", "wb", block_size=file_blocksize) as f:
assert f.blocksize == expected_blocksize
assert fs.blocksize == expected_filesystem_blocksize


@pytest.mark.parametrize(
Expand All @@ -2111,17 +2118,16 @@ def test_blocksize_from_blobfile(storage, file_blocksize, expected_blocksize):
block_size=file_blocksize,
)
assert f.blocksize == expected_blocksize
assert fs.blocksize == 50 * 2**20


def test_override_blocksize(storage):
def test_blobfile_default_blocksize(storage):
fs = AzureBlobFileSystem(
account_name=storage.account_name, connection_string=CONN_STR
account_name=storage.account_name,
connection_string=CONN_STR,
blocksize=20 * 2**20,
)
f = AzureBlobFile(
fs,
"data/root/a/file.txt",
)
assert f.blocksize == 50 * 2**20
f.blocksize = 2 * 2**20
assert f.blocksize == 2 * 2**20