Skip to content

Commit 4c3c633

Browse files
[Storage] Refactor Blob download to address Unicode issues (#35740)
1 parent 5da0dbe commit 4c3c633

File tree

12 files changed

+1063
-447
lines changed

12 files changed

+1063
-447
lines changed

sdk/storage/azure-storage-blob/CHANGELOG.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
## 12.21.0 (Unreleased)
44

55
### Features Added
6-
6+
- Added new `chars` keyword to the `StorageStreamDownloader.read` method to support reading an arbitrary number of
7+
characters from the stream rather than bytes. This can only be used when `encoding` is specified on `download_blob`
8+
but can help prevent decoding errors in certain scenarios.
79

810
## 12.21.0b1 (2024-06-11)
911

sdk/storage/azure-storage-blob/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "python",
44
"TagPrefix": "python/storage/azure-storage-blob",
5-
"Tag": "python/storage/azure-storage-blob_275000b78a"
5+
"Tag": "python/storage/azure-storage-blob_4bb162f320"
66
}

sdk/storage/azure-storage-blob/azure/storage/blob/_download.py

Lines changed: 277 additions & 167 deletions
Large diffs are not rendered by default.

sdk/storage/azure-storage-blob/azure/storage/blob/aio/_download_async.py

Lines changed: 317 additions & 209 deletions
Large diffs are not rendered by default.

sdk/storage/azure-storage-blob/tests/test_blob_encryption_async.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,8 @@ async def test_get_blob_read_with_other_read_operations_ranged(self, **kwargs):
859859
await self._setup(storage_account_name, storage_account_key)
860860
self.bsc.require_encryption = True
861861
self.bsc.key_encryption_key = KeyWrapper('key1')
862+
self.bsc._config.max_single_get_size = 1024
863+
self.bsc._config.max_chunk_get_size = 1024
862864

863865
data = b'12345' * 205 * 10 # 10250 bytes
864866
blob = self.bsc.get_blob_client(self.container_name, self._get_blob_reference(BlobType.BLOCKBLOB))

sdk/storage/azure-storage-blob/tests/test_blob_encryption_v2.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,6 +1098,41 @@ def test_get_blob_read_with_other_read_operations_ranged(self, **kwargs):
10981098
assert second == data[offset + read_size:offset + length]
10991099
assert read_length == len(second)
11001100

1101+
@pytest.mark.live_test_only
1102+
@BlobPreparer()
1103+
def test_get_blob_using_read_chars(self, **kwargs):
1104+
storage_account_name = kwargs.pop("storage_account_name")
1105+
storage_account_key = kwargs.pop("storage_account_key")
1106+
1107+
self._setup(storage_account_name, storage_account_key)
1108+
kek = KeyWrapper('key1')
1109+
bsc = BlobServiceClient(
1110+
self.account_url(storage_account_name, "blob"),
1111+
credential=storage_account_key,
1112+
max_single_get_size=1024,
1113+
max_chunk_get_size=1024,
1114+
require_encryption=True,
1115+
encryption_version='2.0',
1116+
key_encryption_key=kek)
1117+
1118+
blob = bsc.get_blob_client(self.container_name, self._get_blob_reference())
1119+
data = '你好世界' * 1024 # 12 KiB
1120+
blob.upload_blob(data, overwrite=True, encoding='utf-8')
1121+
1122+
# Act / Assert
1123+
stream = blob.download_blob(max_concurrency=2, encoding='utf-8')
1124+
assert stream.read() == data
1125+
1126+
result = ''
1127+
stream = blob.download_blob(encoding='utf-8')
1128+
for _ in range(4):
1129+
chunk = stream.read(chars=300)
1130+
result += chunk
1131+
assert len(chunk) == 300
1132+
1133+
result += stream.readall()
1134+
assert result == data
1135+
11011136
@pytest.mark.skip(reason="Intended for manual testing due to blob size.")
11021137
@pytest.mark.live_test_only
11031138
@BlobPreparer()

sdk/storage/azure-storage-blob/tests/test_blob_encryption_v2_async.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,6 +1110,41 @@ async def test_get_blob_read_with_other_read_operations_ranged(self, **kwargs):
11101110
assert second == data[offset + read_size:offset + length]
11111111
assert read_length == len(second)
11121112

1113+
@pytest.mark.live_test_only
1114+
@BlobPreparer()
1115+
async def test_get_blob_using_read_chars(self, **kwargs):
1116+
storage_account_name = kwargs.pop("storage_account_name")
1117+
storage_account_key = kwargs.pop("storage_account_key")
1118+
1119+
await self._setup(storage_account_name, storage_account_key)
1120+
kek = KeyWrapper('key1')
1121+
bsc = BlobServiceClient(
1122+
self.account_url(storage_account_name, "blob"),
1123+
credential=storage_account_key,
1124+
max_single_get_size=1024,
1125+
max_chunk_get_size=1024,
1126+
require_encryption=True,
1127+
encryption_version='2.0',
1128+
key_encryption_key=kek)
1129+
1130+
blob = bsc.get_blob_client(self.container_name, self._get_blob_reference())
1131+
data = '你好世界' * 1024 # 12 KiB
1132+
await blob.upload_blob(data, overwrite=True, encoding='utf-8')
1133+
1134+
# Act / Assert
1135+
stream = await blob.download_blob(max_concurrency=2, encoding='utf-8')
1136+
assert await stream.read() == data
1137+
1138+
result = ''
1139+
stream = await blob.download_blob(encoding='utf-8')
1140+
for _ in range(4):
1141+
chunk = await stream.read(chars=300)
1142+
result += chunk
1143+
assert len(chunk) == 300
1144+
1145+
result += await stream.readall()
1146+
assert result == data
1147+
11131148
@pytest.mark.skip(reason="Intended for manual testing due to blob size.")
11141149
@pytest.mark.live_test_only
11151150
@BlobPreparer()

0 commit comments

Comments
 (0)