Skip to content

Commit 8658400

Browse files
authored
feat(storage): new option to disable decompressive transcoding (#8834)
For objects stored in gzip format (and with contentEncoding == "gzip") GCS over HTTP automatically decompresses the object during download. Some applications may want to read the object in compressed format. Support this use-case with a new option (`AcceptEncoding`) for `Client::ReadObject()`, and a new helper function (`AcceptEncodingGzip() `) that returns this option with the correct value.
1 parent 8e4b737 commit 8658400

File tree

6 files changed

+136
-4
lines changed

6 files changed

+136
-4
lines changed

google/cloud/storage/client.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,20 +1064,23 @@ class Client {
10641064
* Valid types for this operation include `DisableCrc32cChecksum`,
10651065
* `DisableMD5Hash`, `IfGenerationMatch`, `EncryptionKey`, `Generation`,
10661066
* `IfGenerationMatch`, `IfGenerationNotMatch`, `IfMetagenerationMatch`,
1067-
* `IfMetagenerationNotMatch`, `ReadFromOffset`, `ReadRange`, `ReadLast`
1068-
* and `UserProject`.
1067+
* `IfMetagenerationNotMatch`, `ReadFromOffset`, `ReadRange`, `ReadLast`,
1068+
* `UserProject`, and `AcceptEncoding`.
10691069
*
10701070
* @par Idempotency
10711071
* This is a read-only operation and is always idempotent.
10721072
*
10731073
* @par Example
10741074
* @snippet storage_object_samples.cc read object
10751075
*
1076-
* @par Example
1076+
* @par Example: read only a sub-range in the object.
10771077
* @snippet storage_object_samples.cc read object range
10781078
*
10791079
* @par Example: read a object encrypted with a CSEK.
10801080
* @snippet storage_object_csek_samples.cc read encrypted object
1081+
*
1082+
* @par Example: disable decompressive transcoding.
1083+
* @snippet storage_object_samples.cc read object gzip
10811084
*/
10821085
template <typename... Options>
10831086
ObjectReadStream ReadObject(std::string const& bucket_name,

google/cloud/storage/examples/storage_object_samples.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,22 @@ void ReadObjectIntoMemory(google::cloud::storage::Client client,
326326
(std::move(client), argv.at(0), argv.at(1));
327327
}
328328

329+
void ReadObjectGzip(google::cloud::storage::Client client,
330+
std::vector<std::string> const& argv) {
331+
//! [read object gzip]
332+
namespace gcs = ::google::cloud::storage;
333+
[](gcs::Client client, std::string const& bucket_name,
334+
std::string const& object_name) {
335+
auto is =
336+
client.ReadObject(bucket_name, object_name, gcs::AcceptEncodingGzip());
337+
auto const contents = std::string{std::istream_iterator<char>(is), {}};
338+
if (!is.status().ok()) throw std::runtime_error(is.status().message());
339+
std::cout << "The object has " << contents.size() << " characters\n";
340+
}
341+
//! [read object gzip]
342+
(std::move(client), argv.at(0), argv.at(1));
343+
}
344+
329345
void DeleteObject(google::cloud::storage::Client client,
330346
std::vector<std::string> const& argv) {
331347
//! [delete object] [START storage_delete_file]
@@ -691,6 +707,9 @@ void RunAll(std::vector<std::string> const& argv) {
691707
std::cout << "\nRunning ReadObjectRange() example" << std::endl;
692708
ReadObjectRange(client, {bucket_name, object_name, "1000", "2000"});
693709

710+
std::cout << "\nRunning ReadObjectGzip() example" << std::endl;
711+
ReadObjectGzip(client, {bucket_name, object_name});
712+
694713
std::cout << "\nRunning UpdateObjectMetadata() example" << std::endl;
695714
UpdateObjectMetadata(client,
696715
{bucket_name, object_name, "test-label", "test-value"});
@@ -793,6 +812,7 @@ int main(int argc, char* argv[]) {
793812
make_entry("read-object", {"<object-name>"}, ReadObject),
794813
make_entry("read-object-range", {"<object-name>", "<start>", "<end>"},
795814
ReadObjectRange),
815+
make_entry("read-object-gzip", {"<object-name>"}, ReadObjectGzip),
796816
make_entry("read-object-into-memory", {"<object-name>"},
797817
ReadObjectIntoMemory),
798818
make_entry("delete-object", {"<object-name>"}, DeleteObject),

google/cloud/storage/internal/curl_client.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,6 +1199,7 @@ StatusOr<std::unique_ptr<ObjectReadSource>> CurlClient::ReadObjectXml(
11991199
// None of the IfGeneration*Match nor IfMetageneration*Match can be set. This
12001200
// is checked by the caller (in this class).
12011201
builder.AddOption(request.GetOption<UserProject>());
1202+
builder.AddOption(request.GetOption<AcceptEncoding>());
12021203

12031204
//
12041205
// Apply the options from GenericRequestBase<> that are set, translating

google/cloud/storage/internal/object_requests.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ class ReadObjectRangeRequest
169169
ReadObjectRangeRequest, DisableCrc32cChecksum, DisableMD5Hash,
170170
EncryptionKey, Generation, IfGenerationMatch, IfGenerationNotMatch,
171171
IfMetagenerationMatch, IfMetagenerationNotMatch, ReadFromOffset,
172-
ReadRange, ReadLast, UserProject> {
172+
ReadRange, ReadLast, UserProject, AcceptEncoding> {
173173
public:
174174
using GenericObjectRequest::GenericObjectRequest;
175175

google/cloud/storage/tests/decompressive_transcoding_integration_test.cc

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,81 @@ TEST_F(DecompressiveTranscodingIntegrationTest, WriteAndReadXml) {
127127
ASSERT_NE(decompressed.substr(0, 32), contents.substr(0, 32));
128128
}
129129

130+
TEST_F(DecompressiveTranscodingIntegrationTest, WriteAndReadCompressedJson) {
131+
// TODO(storage-testbench#321) - fix transcoding support in the emulator
132+
if (UsingEmulator()) GTEST_SKIP();
133+
134+
auto const gzip_filename = google::cloud::internal::GetEnv(
135+
"GOOGLE_CLOUD_CPP_STORAGE_TEST_GZIP_FILENAME")
136+
.value_or("");
137+
ASSERT_FALSE(gzip_filename.empty());
138+
std::ifstream gz(gzip_filename, std::ios::binary);
139+
auto const contents = std::string{std::istreambuf_iterator<char>(gz), {}};
140+
ASSERT_TRUE(gz.good());
141+
142+
auto client = Client(
143+
Options{}
144+
.set<TransferStallTimeoutOption>(std::chrono::seconds(3))
145+
.set<RetryPolicyOption>(LimitedErrorCountRetryPolicy(5).clone()));
146+
147+
auto object_name = MakeRandomObjectName();
148+
auto insert = client.InsertObject(
149+
bucket_name(), object_name, contents, IfGenerationMatch(0),
150+
WithObjectMetadata(
151+
ObjectMetadata().set_content_encoding("gzip").set_content_type(
152+
"text/plain")));
153+
ASSERT_STATUS_OK(insert);
154+
ScheduleForDelete(*insert);
155+
EXPECT_EQ(insert->content_encoding(), "gzip");
156+
EXPECT_EQ(insert->content_type(), "text/plain");
157+
158+
auto reader =
159+
client.ReadObject(bucket_name(), object_name, AcceptEncodingGzip(),
160+
IfGenerationNotMatch(0));
161+
ASSERT_STATUS_OK(reader.status());
162+
auto compressed = std::string{std::istreambuf_iterator<char>(reader), {}};
163+
ASSERT_STATUS_OK(reader.status());
164+
165+
ASSERT_EQ(compressed.substr(0, 32), contents.substr(0, 32));
166+
}
167+
168+
TEST_F(DecompressiveTranscodingIntegrationTest, WriteAndReadCompressedXml) {
169+
// TODO(storage-testbench#321) - fix transcoding support in the emulator
170+
if (UsingEmulator()) GTEST_SKIP();
171+
172+
auto const gzip_filename = google::cloud::internal::GetEnv(
173+
"GOOGLE_CLOUD_CPP_STORAGE_TEST_GZIP_FILENAME")
174+
.value_or("");
175+
ASSERT_FALSE(gzip_filename.empty());
176+
std::ifstream gz(gzip_filename, std::ios::binary);
177+
auto const contents = std::string{std::istreambuf_iterator<char>(gz), {}};
178+
ASSERT_TRUE(gz.good());
179+
180+
auto client = Client(
181+
Options{}
182+
.set<TransferStallTimeoutOption>(std::chrono::seconds(3))
183+
.set<RetryPolicyOption>(LimitedErrorCountRetryPolicy(5).clone()));
184+
185+
auto object_name = MakeRandomObjectName();
186+
auto insert = client.InsertObject(
187+
bucket_name(), object_name, contents, IfGenerationMatch(0),
188+
WithObjectMetadata(
189+
ObjectMetadata().set_content_encoding("gzip").set_content_type(
190+
"text/plain")));
191+
ASSERT_STATUS_OK(insert);
192+
ScheduleForDelete(*insert);
193+
EXPECT_EQ(insert->content_encoding(), "gzip");
194+
EXPECT_EQ(insert->content_type(), "text/plain");
195+
196+
auto reader =
197+
client.ReadObject(bucket_name(), object_name, AcceptEncodingGzip());
198+
ASSERT_STATUS_OK(reader.status());
199+
auto compressed = std::string{std::istreambuf_iterator<char>(reader), {}};
200+
ASSERT_STATUS_OK(reader.status());
201+
202+
ASSERT_EQ(compressed.substr(0, 32), contents.substr(0, 32));
203+
}
204+
130205
} // anonymous namespace
131206
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
132207
} // namespace storage

google/cloud/storage/well_known_headers.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,39 @@ EncryptionKeyData CreateKeyFromGenerator(Generator& gen) {
277277
return EncryptionDataFromBinaryKey(key);
278278
}
279279

280+
/**
281+
* Modify the accepted encodings.
282+
*
283+
* When using HTTP, GCS decompresses gzip-encoded objects by default:
284+
*
285+
* https://cloud.google.com/storage/docs/transcoding
286+
*
287+
* Setting this option to `gzip` disables automatic decompression. This can be
288+
* useful for applications wanting to operate with the compressed data. Setting
289+
* this option to `identity`, or not setting this option, returns decompressed
290+
* data.
291+
*
292+
* @note Note that decompressive transcoding only apply to objects that are
293+
* compressed with `gzip` and have their `content_encoding()` attribute set
294+
* accordingly. At the time of this writing GCS does not decompress objects
295+
* stored with other compression algorithms, nor does it detect the object
296+
* compression based on the object name or its contents.
297+
*
298+
* @see `AcceptEncodingGzip()` is a helper function to disable decompressive
299+
* encoding.
300+
*/
301+
struct AcceptEncoding
302+
: public internal::WellKnownHeader<AcceptEncoding, std::string> {
303+
using WellKnownHeader<AcceptEncoding, std::string>::WellKnownHeader;
304+
static char const* header_name() { return "Accept-Encoding"; }
305+
};
306+
307+
inline AcceptEncoding AcceptEncodingGzip() { return AcceptEncoding("gzip"); }
308+
309+
inline AcceptEncoding AcceptEncodingIdentity() {
310+
return AcceptEncoding("identity");
311+
}
312+
280313
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
281314
} // namespace storage
282315
} // namespace cloud

0 commit comments

Comments
 (0)