Skip to content

Commit 5c6a72b

Browse files
committed
feat(bigtable): add validation for checksum in stream processing
1 parent 0f0a7cf commit 5c6a72b

13 files changed

+496
-13
lines changed

google/cloud/bigtable/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ cc_library(
5959
"@com_google_googleapis//google/bigtable/v2:bigtable_cc_grpc",
6060
"@com_google_googleapis//google/longrunning:longrunning_cc_grpc",
6161
"@com_google_googleapis//google/rpc:error_details_cc_proto",
62+
"@com_github_google_crc32c//:crc32c",
6263
"@com_github_grpc_grpc//:grpc++",
6364
] + select({
6465
":metrics_enabled": [

google/cloud/bigtable/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ include(CTest)
4848
include(CreateBazelConfig)
4949

5050
find_package(opentelemetry-cpp CONFIG)
51+
find_package(Crc32c)
5152

5253
# the client library
5354
add_library(
@@ -179,8 +180,12 @@ add_library(
179180
internal/common_client.h
180181
internal/connection_refresh_state.cc
181182
internal/connection_refresh_state.h
183+
internal/const_buffer.cc
184+
internal/const_buffer.h
182185
internal/convert_policies.cc
183186
internal/convert_policies.h
187+
internal/crc32c.cc
188+
internal/crc32c.h
184189
internal/data_connection_impl.cc
185190
internal/data_connection_impl.h
186191
internal/data_tracing_connection.cc
@@ -291,6 +296,7 @@ target_link_libraries(
291296
google-cloud-cpp::bigtable_protos
292297
google-cloud-cpp::common
293298
google-cloud-cpp::grpc_utils
299+
Crc32c::crc32c
294300
gRPC::grpc++
295301
gRPC::grpc
296302
protobuf::libprotobuf)
@@ -477,6 +483,7 @@ if (BUILD_TESTING)
477483
internal/bulk_mutator_test.cc
478484
internal/connection_refresh_state_test.cc
479485
internal/convert_policies_test.cc
486+
internal/crc32c_test.cc
480487
internal/data_connection_impl_test.cc
481488
internal/data_tracing_connection_test.cc
482489
internal/default_row_reader_test.cc

google/cloud/bigtable/bigtable_client_unit_tests.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ bigtable_client_unit_tests = [
5151
"internal/bulk_mutator_test.cc",
5252
"internal/connection_refresh_state_test.cc",
5353
"internal/convert_policies_test.cc",
54+
"internal/crc32c_test.cc",
5455
"internal/data_connection_impl_test.cc",
5556
"internal/data_tracing_connection_test.cc",
5657
"internal/default_row_reader_test.cc",

google/cloud/bigtable/google_cloud_cpp_bigtable.bzl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,9 @@ google_cloud_cpp_bigtable_hdrs = [
8888
"internal/client_options_defaults.h",
8989
"internal/common_client.h",
9090
"internal/connection_refresh_state.h",
91+
"internal/const_buffer.h",
9192
"internal/convert_policies.h",
93+
"internal/crc32c.h",
9294
"internal/data_connection_impl.h",
9395
"internal/data_tracing_connection.h",
9496
"internal/default_row_reader.h",
@@ -208,7 +210,9 @@ google_cloud_cpp_bigtable_srcs = [
208210
"internal/bigtable_tracing_stub.cc",
209211
"internal/bulk_mutator.cc",
210212
"internal/connection_refresh_state.cc",
213+
"internal/const_buffer.cc",
211214
"internal/convert_policies.cc",
215+
"internal/crc32c.cc",
212216
"internal/data_connection_impl.cc",
213217
"internal/data_tracing_connection.cc",
214218
"internal/default_row_reader.cc",
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Copyright 2023 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "google/cloud/bigtable/internal/const_buffer.h"
16+
17+
namespace google {
18+
namespace cloud {
19+
namespace bigtable_internal {
20+
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN
21+
22+
void PopFrontBytes(ConstBufferSequence& s, std::size_t count) {
23+
auto i = s.begin();
24+
for (; i != s.end() && i->size() <= count; ++i) {
25+
count -= i->size();
26+
}
27+
if (i == s.end()) {
28+
s.clear();
29+
return;
30+
}
31+
// In practice this is expected to be cheap, most vectors will contain 1
32+
// or 2 elements. And, if you are really lucky, your compiler turns this
33+
// into a memmove():
34+
// https://godbolt.org/z/jw5VDd
35+
s.erase(s.begin(), i);
36+
if (count > 0 && !s.empty()) {
37+
s.front() = ConstBuffer(s.front().data() + count, s.front().size() - count);
38+
}
39+
}
40+
41+
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
42+
} // namespace bigtable_internal
43+
} // namespace cloud
44+
} // namespace google
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// Copyright 2023 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_CONST_BUFFER_H
16+
#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_CONST_BUFFER_H
17+
18+
#include "google/cloud/bigtable/version.h"
19+
#include "absl/types/span.h"
20+
#include <numeric>
21+
#include <vector>
22+
23+
namespace google {
24+
namespace cloud {
25+
namespace bigtable_internal {
26+
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN
27+
28+
/// Represent a memory range. Use to upload with low copying
29+
using ConstBuffer = absl::Span<char const>;
30+
31+
/// Represent a sequence of memory ranges. Use to upload with low copying.
32+
using ConstBufferSequence = std::vector<ConstBuffer>;
33+
34+
/// The total number of bytes in the buffer sequence.
35+
inline std::size_t TotalBytes(ConstBufferSequence const& s) {
36+
return std::accumulate(
37+
s.begin(), s.end(), std::size_t{0},
38+
[](std::size_t a, ConstBuffer const& b) { return a + b.size(); });
39+
}
40+
41+
/// Remove @p count bytes at the start of @p s
42+
void PopFrontBytes(ConstBufferSequence& s, std::size_t count);
43+
44+
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
45+
} // namespace bigtable_internal
46+
} // namespace cloud
47+
} // namespace google
48+
49+
#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_CONST_BUFFER_H
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
// Copyright 2023 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "google/cloud/bigtable/internal/crc32c.h"
16+
#include "absl/base/config.h"
17+
#if defined(ABSL_LTS_RELEASE_VERSION) && ABSL_LTS_RELEASE_VERSION >= 20230125
18+
#include "absl/crc/crc32c.h"
19+
#define GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C 1
20+
#else
21+
#define GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C 0
22+
#endif // ABSL_LTS_RELEASE_VERSION
23+
#include <crc32c/crc32c.h>
24+
25+
namespace google {
26+
namespace cloud {
27+
namespace bigtable_internal {
28+
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN
29+
30+
std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data) {
31+
return crc32c::Extend(crc, reinterpret_cast<uint8_t const*>(data.data()),
32+
data.size());
33+
}
34+
35+
std::uint32_t ExtendCrc32c(std::uint32_t crc,
36+
bigtable_internal::ConstBufferSequence const& data) {
37+
for (auto const& b : data) {
38+
crc = ExtendCrc32c(crc, absl::string_view{b.data(), b.size()});
39+
}
40+
return crc;
41+
}
42+
43+
std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data) {
44+
for (auto i = data.chunk_begin(); i != data.chunk_end(); ++i) {
45+
crc = ExtendCrc32c(crc, *i);
46+
}
47+
return crc;
48+
}
49+
50+
#if GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C
51+
52+
std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data,
53+
std::uint32_t data_crc) {
54+
return static_cast<std::uint32_t>(absl::ConcatCrc32c(
55+
absl::crc32c_t{crc}, absl::crc32c_t{data_crc}, data.size()));
56+
}
57+
58+
std::uint32_t ExtendCrc32c(std::uint32_t crc,
59+
bigtable_internal::ConstBufferSequence const& data,
60+
std::uint32_t data_crc) {
61+
auto const size = bigtable_internal::TotalBytes(data);
62+
return static_cast<std::uint32_t>(
63+
absl::ConcatCrc32c(absl::crc32c_t{crc}, absl::crc32c_t{data_crc}, size));
64+
}
65+
66+
std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data,
67+
std::uint32_t data_crc) {
68+
return static_cast<std::uint32_t>(absl::ConcatCrc32c(
69+
absl::crc32c_t{crc}, absl::crc32c_t{data_crc}, data.size()));
70+
}
71+
72+
#else
73+
74+
std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data,
75+
std::uint32_t /*data_crc*/) {
76+
return ExtendCrc32c(crc, data);
77+
}
78+
79+
std::uint32_t ExtendCrc32c(std::uint32_t crc,
80+
internal::ConstBufferSequence const& data,
81+
std::uint32_t /*data_crc*/) {
82+
return ExtendCrc32c(crc, data);
83+
}
84+
85+
std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data,
86+
std::uint32_t /*data_crc*/) {
87+
return ExtendCrc32c(crc, data);
88+
}
89+
90+
#endif // GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C
91+
92+
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
93+
} // namespace bigtable_internal
94+
} // namespace cloud
95+
} // namespace google
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// Copyright 2023 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_CRC32C_H
16+
#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_CRC32C_H
17+
18+
#include "google/cloud/bigtable/internal/const_buffer.h"
19+
#include "google/cloud/bigtable/version.h"
20+
#include "absl/strings/cord.h"
21+
#include "absl/strings/string_view.h"
22+
#include <cstdint>
23+
24+
namespace google {
25+
namespace cloud {
26+
namespace bigtable_internal {
27+
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN
28+
29+
std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data);
30+
std::uint32_t ExtendCrc32c(std::uint32_t crc,
31+
bigtable_internal::ConstBufferSequence const& data);
32+
std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data);
33+
34+
std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data,
35+
std::uint32_t data_crc);
36+
std::uint32_t ExtendCrc32c(std::uint32_t crc,
37+
bigtable_internal::ConstBufferSequence const& data,
38+
std::uint32_t data_crc);
39+
std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data,
40+
std::uint32_t data_crc);
41+
42+
inline std::uint32_t Crc32c(absl::string_view data) {
43+
return ExtendCrc32c(0, data);
44+
}
45+
46+
inline std::uint32_t Crc32c(
47+
bigtable_internal::ConstBufferSequence const& data) {
48+
return ExtendCrc32c(0, data);
49+
}
50+
51+
inline std::uint32_t Crc32c(absl::Cord const& data) {
52+
return ExtendCrc32c(0, data);
53+
}
54+
55+
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
56+
} // namespace bigtable_internal
57+
} // namespace cloud
58+
} // namespace google
59+
60+
#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_CRC32C_H

0 commit comments

Comments
 (0)