Skip to content
This repository was archived by the owner on Dec 8, 2021. It is now read-only.

Commit 68fd303

Browse files
authored
BREAKING CHANGE: Change Value::Bytes to g::c::spanner::Bytes (#920)
Introduce `google::cloud::spanner::Bytes` as the representation for the Spanner BYTES type ... a sequence of raw bytes (in contrast to the Spanner STRING type, which is interpreted as Unicode characters). Like the previous `Value::Bytes`, a `google::cloud::spanner::Bytes` value can be converted to/from a base64-encoded US-ASCII `std::string`, although those operations are now internal-only. Unlike `Value::Bytes`, `google::cloud::spanner::Bytes` can be converted to/from any ordered sequence of bytes, not just `std::string`. That is, it can accept/produce data in any compatible representation (for example, as a `std::vector<std::uint8_t>`). Fixes #413.
1 parent 889ac24 commit 68fd303

File tree

10 files changed

+357
-249
lines changed

10 files changed

+357
-249
lines changed

google/cloud/spanner/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ add_library(spanner_client
9797
${CMAKE_CURRENT_BINARY_DIR}/internal/build_info.cc
9898
backoff_policy.h
9999
batch_dml_result.h
100+
bytes.cc
101+
bytes.h
100102
client.cc
101103
client.h
102104
commit_result.h
@@ -120,8 +122,6 @@ add_library(spanner_client
120122
instance_admin_connection.h
121123
internal/api_client_header.cc
122124
internal/api_client_header.h
123-
internal/base64.cc
124-
internal/base64.h
125125
internal/build_info.h
126126
internal/compiler_info.cc
127127
internal/compiler_info.h
@@ -299,7 +299,6 @@ function (spanner_client_define_tests)
299299
instance_admin_connection_test.cc
300300
instance_test.cc
301301
internal/api_client_header_test.cc
302-
internal/base64_test.cc
303302
internal/build_info_test.cc
304303
internal/compiler_info_test.cc
305304
internal/connection_impl_test.cc
@@ -321,6 +320,7 @@ function (spanner_client_define_tests)
321320
internal/time_test.cc
322321
internal/transaction_impl_test.cc
323322
internal/tuple_utils_test.cc
323+
bytes_test.cc
324324
keys_test.cc
325325
mutations_test.cc
326326
read_options_test.cc

google/cloud/spanner/internal/base64.cc renamed to google/cloud/spanner/bytes.cc

Lines changed: 64 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,16 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#include "google/cloud/spanner/internal/base64.h"
15+
#include "google/cloud/spanner/bytes.h"
16+
#include "google/cloud/status.h"
1617
#include <array>
1718
#include <climits>
18-
#include <string>
1919

2020
namespace google {
2121
namespace cloud {
2222
namespace spanner {
2323
inline namespace SPANNER_CLIENT_NS {
24-
namespace internal {
24+
2525
namespace {
2626

2727
constexpr char kPadding = '=';
@@ -51,83 +51,105 @@ constexpr std::array<unsigned char, UCHAR_MAX + 1> kCharToIndexExcessOne = {{
5151
// UCHAR_MAX is required to be at least 255, meaning std::string::value_type
5252
// can always hold an octet. If UCHAR_MAX > 255, however, we have no way to
5353
// base64 encode large values. So, we demand exactly 255.
54-
static_assert(UCHAR_MAX == 255, "required by Base64Encode()");
54+
static_assert(UCHAR_MAX == 255, "required by base64 encoder");
5555

5656
} // namespace
5757

58-
std::string Base64Encode(std::string const& bytes) {
59-
std::string encoded;
60-
auto* p = reinterpret_cast<unsigned char const*>(bytes.data());
61-
auto* const ep = p + bytes.size();
62-
encoded.reserve((ep - p + 2) / 3 * 4); // 3 octets to 4 sextets
63-
while (ep - p >= 3) {
64-
unsigned int const v = p[0] << 16 | p[1] << 8 | p[2];
65-
encoded.push_back(kIndexToChar[v >> 18]);
66-
encoded.push_back(kIndexToChar[v >> 12 & 0x3f]);
67-
encoded.push_back(kIndexToChar[v >> 6 & 0x3f]);
68-
encoded.push_back(kIndexToChar[v & 0x3f]);
69-
p += 3;
70-
}
71-
switch (ep - p) {
58+
void Bytes::Encoder::Flush() {
59+
unsigned int const v = buf_[0] << 16 | buf_[1] << 8 | buf_[2];
60+
rep_.push_back(kIndexToChar[v >> 18]);
61+
rep_.push_back(kIndexToChar[v >> 12 & 0x3f]);
62+
rep_.push_back(kIndexToChar[v >> 6 & 0x3f]);
63+
rep_.push_back(kIndexToChar[v & 0x3f]);
64+
len_ = 0;
65+
}
66+
67+
void Bytes::Encoder::FlushAndPad() {
68+
switch (len_) {
7269
case 2: {
73-
unsigned int const v = p[0] << 16 | p[1] << 8;
74-
encoded.push_back(kIndexToChar[v >> 18]);
75-
encoded.push_back(kIndexToChar[v >> 12 & 0x3f]);
76-
encoded.push_back(kIndexToChar[v >> 6 & 0x3f]);
77-
encoded.push_back(kPadding);
70+
unsigned int const v = buf_[0] << 16 | buf_[1] << 8;
71+
rep_.push_back(kIndexToChar[v >> 18]);
72+
rep_.push_back(kIndexToChar[v >> 12 & 0x3f]);
73+
rep_.push_back(kIndexToChar[v >> 6 & 0x3f]);
74+
rep_.push_back(kPadding);
7875
break;
7976
}
8077
case 1: {
81-
unsigned int const v = p[0] << 16;
82-
encoded.push_back(kIndexToChar[v >> 18]);
83-
encoded.push_back(kIndexToChar[v >> 12 & 0x3f]);
84-
encoded.append(2, kPadding);
78+
unsigned int const v = buf_[0] << 16;
79+
rep_.push_back(kIndexToChar[v >> 18]);
80+
rep_.push_back(kIndexToChar[v >> 12 & 0x3f]);
81+
rep_.append(2, kPadding);
8582
break;
8683
}
8784
}
88-
return encoded;
8985
}
9086

91-
StatusOr<std::string> Base64Decode(std::string const& base64) {
92-
std::string decoded;
93-
auto* p = reinterpret_cast<unsigned char const*>(base64.data());
94-
auto* ep = p + base64.size();
95-
decoded.reserve((ep - p + 3) / 4 * 3); // 4 sextets to 3 octets
87+
void Bytes::Decoder::iterator::Fill() {
88+
if (pos_ != end_) {
89+
unsigned char p0 = *pos_++;
90+
unsigned char p1 = *pos_++;
91+
unsigned char p2 = *pos_++;
92+
unsigned char p3 = *pos_++;
93+
auto i0 = kCharToIndexExcessOne[p0] - 1;
94+
auto i1 = kCharToIndexExcessOne[p1] - 1;
95+
if (p3 == kPadding) {
96+
if (p2 == kPadding) {
97+
buf_[++len_] = i0 << 2 | i1 >> 4;
98+
} else {
99+
auto i2 = kCharToIndexExcessOne[p2] - 1;
100+
buf_[++len_] = i1 << 4 | i2 >> 2;
101+
buf_[++len_] = i0 << 2 | i1 >> 4;
102+
}
103+
} else {
104+
auto i2 = kCharToIndexExcessOne[p2] - 1;
105+
auto i3 = kCharToIndexExcessOne[p3] - 1;
106+
buf_[++len_] = i2 << 6 | i3;
107+
buf_[++len_] = i1 << 4 | i2 >> 2;
108+
buf_[++len_] = i0 << 2 | i1 >> 4;
109+
}
110+
}
111+
}
112+
113+
namespace internal {
114+
115+
// Construction from a base64-encoded US-ASCII `std::string`.
116+
StatusOr<Bytes> BytesFromBase64(std::string input) {
117+
auto* p = reinterpret_cast<unsigned char const*>(input.data());
118+
auto* ep = p + input.size();
96119
while (ep - p >= 4) {
97120
auto i0 = kCharToIndexExcessOne[p[0]];
98121
auto i1 = kCharToIndexExcessOne[p[1]];
99122
if (--i0 >= 64 || --i1 >= 64) break;
100123
if (p[3] == kPadding) {
101124
if (p[2] == kPadding) {
102125
if ((i1 & 0xf) != 0) break;
103-
decoded.push_back(i0 << 2 | i1 >> 4);
104126
} else {
105127
auto i2 = kCharToIndexExcessOne[p[2]];
106128
if (--i2 >= 64 || (i2 & 0x3) != 0) break;
107-
decoded.push_back(i0 << 2 | i1 >> 4);
108-
decoded.push_back(i1 << 4 | i2 >> 2);
109129
}
110130
p += 4;
111131
break;
112132
}
113133
auto i2 = kCharToIndexExcessOne[p[2]];
114134
auto i3 = kCharToIndexExcessOne[p[3]];
115135
if (--i2 >= 64 || --i3 >= 64) break;
116-
decoded.push_back(i0 << 2 | i1 >> 4);
117-
decoded.push_back(i1 << 4 | i2 >> 2);
118-
decoded.push_back(i2 << 6 | i3);
119136
p += 4;
120137
}
121138
if (p != ep) {
122-
auto const offset = reinterpret_cast<char const*>(p) - base64.data();
123-
auto const bad_chunk = base64.substr(offset, 4);
139+
auto const offset = reinterpret_cast<char const*>(p) - input.data();
140+
auto const bad_chunk = input.substr(offset, 4);
124141
auto message = "Invalid base64 chunk \"" + bad_chunk + "\"" +
125142
" at offset " + std::to_string(offset);
126143
return Status(StatusCode::kInvalidArgument, std::move(message));
127144
}
128-
return decoded;
145+
Bytes bytes;
146+
bytes.base64_rep_ = std::move(input);
147+
return bytes;
129148
}
130149

150+
// Conversion to a base64-encoded US-ASCII `std::string`.
151+
std::string BytesToBase64(Bytes const& b) { return b.base64_rep_; }
152+
131153
} // namespace internal
132154
} // namespace SPANNER_CLIENT_NS
133155
} // namespace spanner

google/cloud/spanner/bytes.h

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
// Copyright 2019 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#ifndef GOOGLE_CLOUD_CPP_SPANNER_GOOGLE_CLOUD_SPANNER_BYTES_H_
16+
#define GOOGLE_CLOUD_CPP_SPANNER_GOOGLE_CLOUD_SPANNER_BYTES_H_
17+
18+
#include "google/cloud/spanner/version.h"
19+
#include "google/cloud/status_or.h"
20+
#include <array>
21+
#include <cstddef>
22+
#include <iterator>
23+
#include <string>
24+
25+
namespace google {
26+
namespace cloud {
27+
namespace spanner {
28+
inline namespace SPANNER_CLIENT_NS {
29+
30+
class Bytes; // defined below
31+
32+
// Internal forward declarations to befriend.
33+
namespace internal {
34+
StatusOr<Bytes> BytesFromBase64(std::string input);
35+
std::string BytesToBase64(Bytes const& b);
36+
} // namespace internal
37+
38+
/**
39+
* A representation of the Spanner BYTES type: variable-length binary data.
40+
*
41+
* A `Bytes` value can be constructed from, and converted to any sequence of
42+
* octets. `Bytes` values can be compared for equality.
43+
*/
44+
class Bytes {
45+
public:
46+
/// An empty sequence.
47+
Bytes() {}
48+
49+
/// Construction from a sequence of octets.
50+
///@{
51+
template <typename InputIt>
52+
Bytes(InputIt first, InputIt last) {
53+
Encoder encoder(base64_rep_);
54+
while (first != last) {
55+
encoder.buf_[encoder.len_++] = *first++;
56+
if (encoder.len_ == encoder.buf_.size()) encoder.Flush();
57+
}
58+
if (encoder.len_ != 0) encoder.FlushAndPad();
59+
}
60+
template <typename Container>
61+
explicit Bytes(Container const& c) : Bytes(std::begin(c), std::end(c)) {}
62+
///@}
63+
64+
/// Conversion to a sequence of octets. The `Container` must support
65+
/// construction from a range specified as a pair of input iterators.
66+
template <typename Container>
67+
Container get() const {
68+
Decoder decoder(base64_rep_);
69+
return Container(decoder.begin(), decoder.end());
70+
}
71+
72+
/// @name Relational operators
73+
///@{
74+
friend bool operator==(Bytes const& a, Bytes const& b) {
75+
return a.base64_rep_ == b.base64_rep_;
76+
}
77+
friend bool operator!=(Bytes const& a, Bytes const& b) { return !(a == b); }
78+
///@}
79+
80+
private:
81+
friend StatusOr<Bytes> internal::BytesFromBase64(std::string input);
82+
friend std::string internal::BytesToBase64(Bytes const& b);
83+
84+
struct Encoder {
85+
Encoder(std::string& rep) : rep_(rep), len_(0) {}
86+
void Flush();
87+
void FlushAndPad();
88+
89+
std::string& rep_; // encoded
90+
std::size_t len_; // buf_[0 .. len_-1] pending encode
91+
std::array<unsigned char, 3> buf_;
92+
};
93+
94+
struct Decoder {
95+
class iterator {
96+
public:
97+
using iterator_category = std::input_iterator_tag;
98+
using value_type = unsigned char;
99+
using difference_type = std::ptrdiff_t;
100+
using pointer = value_type*;
101+
using reference = value_type&;
102+
103+
iterator(std::string::const_iterator begin,
104+
std::string::const_iterator end)
105+
: pos_(begin), end_(end), len_(0) {
106+
Fill();
107+
}
108+
109+
void Fill();
110+
111+
reference operator*() { return buf_[len_]; }
112+
pointer operator->() { return &buf_[len_]; }
113+
114+
iterator& operator++() {
115+
if (--len_ == 0) Fill();
116+
return *this;
117+
}
118+
iterator operator++(int) {
119+
auto const old = *this;
120+
operator++();
121+
return old;
122+
}
123+
124+
friend bool operator==(iterator const& a, iterator const& b) {
125+
return a.pos_ == b.pos_ && a.len_ == b.len_;
126+
}
127+
friend bool operator!=(iterator const& a, iterator const& b) {
128+
return !(a == b);
129+
}
130+
131+
private:
132+
std::string::const_iterator pos_; // [pos_ .. end_) pending decode
133+
std::string::const_iterator end_;
134+
std::size_t len_; // buf_[len_ .. 1] decoded
135+
std::array<value_type, 1 + 3> buf_;
136+
};
137+
138+
Decoder(std::string const& rep) : rep_(rep) {}
139+
iterator begin() { return iterator(rep_.begin(), rep_.end()); }
140+
iterator end() { return iterator(rep_.end(), rep_.end()); }
141+
142+
std::string const& rep_; // encoded
143+
};
144+
145+
std::string base64_rep_; // valid base64 representation
146+
};
147+
148+
} // namespace SPANNER_CLIENT_NS
149+
} // namespace spanner
150+
} // namespace cloud
151+
} // namespace google
152+
153+
#endif // GOOGLE_CLOUD_CPP_SPANNER_GOOGLE_CLOUD_SPANNER_BYTES_H_

0 commit comments

Comments
 (0)