Skip to content

Commit 139390c

Browse files
authored
feat: compute chunk wise checksum for bidi_writes (#1675)
feat: compute chunk wise checksum for bidi_writes and send it via BidiWriteObjectRequest As a part of this change, also did a small refactoring * Moved the precondition check to __utils.py_ file
1 parent d6b8f55 commit 139390c

File tree

5 files changed

+74
-18
lines changed

5 files changed

+74
-18
lines changed
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import google_crc32c
16+
17+
from google.api_core import exceptions
18+
19+
def raise_if_no_fast_crc32c():
20+
"""Check if the C-accelerated version of google-crc32c is available.
21+
22+
If not, raise an error to prevent silent performance degradation.
23+
24+
raises google.api_core.exceptions.FailedPrecondition: If the C extension is not available.
25+
returns: True if the C extension is available.
26+
rtype: bool
27+
28+
"""
29+
if google_crc32c.implementation != "c":
30+
raise exceptions.FailedPrecondition(
31+
"The google-crc32c package is not installed with C support. "
32+
"C extension is required for faster data integrity checks."
33+
"For more information, see https://github.com/googleapis/python-crc32c."
34+
)

google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
2323
"""
2424
from typing import Optional, Union
25+
26+
from google_crc32c import Checksum
27+
28+
from ._utils import raise_if_no_fast_crc32c
2529
from google.cloud import _storage_v2
2630
from google.cloud.storage._experimental.asyncio.async_grpc_client import (
2731
AsyncGrpcClient,
@@ -100,6 +104,7 @@ def __init__(
100104
:param write_handle: (Optional) An existing handle for writing the object.
101105
If provided, opening the bidi-gRPC connection will be faster.
102106
"""
107+
raise_if_no_fast_crc32c()
103108
self.client = client
104109
self.bucket_name = bucket_name
105110
self.object_name = object_name
@@ -191,11 +196,13 @@ async def append(self, data: bytes) -> None:
191196
bytes_to_flush = 0
192197
while start_idx < total_bytes:
193198
end_idx = min(start_idx + _MAX_CHUNK_SIZE_BYTES, total_bytes)
199+
data_chunk = data[start_idx:end_idx]
194200
await self.write_obj_stream.send(
195201
_storage_v2.BidiWriteObjectRequest(
196202
write_offset=self.offset,
197203
checksummed_data=_storage_v2.ChecksummedData(
198-
content=data[start_idx:end_idx]
204+
content=data_chunk,
205+
crc32c=int.from_bytes(Checksum(data_chunk).digest(), "big"),
199206
),
200207
)
201208
)

google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,11 @@
1414

1515
from __future__ import annotations
1616
import asyncio
17-
import google_crc32c
18-
from google.api_core import exceptions
19-
from google_crc32c import Checksum
20-
2117
from typing import List, Optional, Tuple
2218

19+
from google_crc32c import Checksum
20+
21+
from ._utils import raise_if_no_fast_crc32c
2322
from google.cloud.storage._experimental.asyncio.async_read_object_stream import (
2423
_AsyncReadObjectStream,
2524
)
@@ -160,14 +159,7 @@ def __init__(
160159
:param read_handle: (Optional) An existing read handle.
161160
"""
162161

163-
# Verify that the fast, C-accelerated version of crc32c is available.
164-
# If not, raise an error to prevent silent performance degradation.
165-
if google_crc32c.implementation != "c":
166-
raise exceptions.NotFound(
167-
"The google-crc32c package is not installed with C support. "
168-
"Bidi reads require the C extension for data integrity checks."
169-
"For more information, see https://github.com/googleapis/python-crc32c."
170-
)
162+
raise_if_no_fast_crc32c()
171163

172164
self.client = client
173165
self.bucket_name = bucket_name

tests/unit/asyncio/test_async_appendable_object_writer.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
import pytest
1616
from unittest import mock
1717

18+
from google_crc32c import Checksum
19+
20+
from google.api_core import exceptions
1821
from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import (
1922
AsyncAppendableObjectWriter,
2023
)
@@ -85,6 +88,23 @@ def test_init_with_optional_args(mock_write_object_stream, mock_client):
8588
)
8689

8790

91+
@mock.patch("google.cloud.storage._experimental.asyncio._utils.google_crc32c")
92+
@mock.patch(
93+
"google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client"
94+
)
95+
def test_init_raises_if_crc32c_c_extension_is_missing(
96+
mock_grpc_client, mock_google_crc32c
97+
):
98+
mock_google_crc32c.implementation = "python"
99+
100+
with pytest.raises(exceptions.FailedPrecondition) as exc_info:
101+
AsyncAppendableObjectWriter(mock_grpc_client, "bucket", "object")
102+
103+
assert "The google-crc32c package is not installed with C support" in str(
104+
exc_info.value
105+
)
106+
107+
88108
@pytest.mark.asyncio
89109
@mock.patch(
90110
"google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream"
@@ -434,10 +454,15 @@ async def test_append_sends_data_in_chunks(mock_write_object_stream, mock_client
434454
# First chunk
435455
assert first_call[0][0].write_offset == 100
436456
assert len(first_call[0][0].checksummed_data.content) == _MAX_CHUNK_SIZE_BYTES
437-
457+
assert first_call[0][0].checksummed_data.crc32c == int.from_bytes(
458+
Checksum(data[:_MAX_CHUNK_SIZE_BYTES]).digest(), byteorder="big"
459+
)
438460
# Second chunk
439461
assert second_call[0][0].write_offset == 100 + _MAX_CHUNK_SIZE_BYTES
440462
assert len(second_call[0][0].checksummed_data.content) == 1
463+
assert second_call[0][0].checksummed_data.crc32c == int.from_bytes(
464+
Checksum(data[_MAX_CHUNK_SIZE_BYTES:]).digest(), byteorder="big"
465+
)
441466

442467
assert writer.offset == 100 + len(data)
443468
writer.simple_flush.assert_not_awaited()

tests/unit/asyncio/test_async_multi_range_downloader.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -349,9 +349,7 @@ async def test_downloading_without_opening_should_throw_error(
349349
assert str(exc.value) == "Underlying bidi-gRPC stream is not open"
350350
assert not mrd.is_stream_open
351351

352-
@mock.patch(
353-
"google.cloud.storage._experimental.asyncio.async_multi_range_downloader.google_crc32c"
354-
)
352+
@mock.patch("google.cloud.storage._experimental.asyncio._utils.google_crc32c")
355353
@mock.patch(
356354
"google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client"
357355
)
@@ -360,7 +358,7 @@ def test_init_raises_if_crc32c_c_extension_is_missing(
360358
):
361359
mock_google_crc32c.implementation = "python"
362360

363-
with pytest.raises(exceptions.NotFound) as exc_info:
361+
with pytest.raises(exceptions.FailedPrecondition) as exc_info:
364362
AsyncMultiRangeDownloader(mock_grpc_client, "bucket", "object")
365363

366364
assert "The google-crc32c package is not installed with C support" in str(

0 commit comments

Comments
 (0)