Skip to content

Commit 0ecc472

Browse files
authored
GH-46971: [C++][Parquet] Use temporary buffers when decrypting Parquet data pages (#46972)
### Rationale for this change Reduce memory usage required when reading wide, encrypted Parquet files. ### What changes are included in this PR? Change `SerializedPageReader` so that it doesn't hold a decryption buffer but only allocates one as needed, so it can be freed after pages are decompressed. ### Are these changes tested? This is only a performance improvement and doesn't change any behaviour so should be covered by existing tests. The memory improvement has been verified manually (see #46971). ### Are there any user-facing changes? No * GitHub Issue: #46971 Authored-by: Adam Reeve <[email protected]> Signed-off-by: Adam Reeve <[email protected]>
1 parent dadc21f commit 0ecc472

File tree

1 file changed

+7
-12
lines changed

1 file changed

+7
-12
lines changed

cpp/src/parquet/column_reader.cc

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,7 @@ class SerializedPageReader : public PageReader {
226226
decompression_buffer_(AllocateBuffer(properties_.memory_pool(), 0)),
227227
page_ordinal_(0),
228228
seen_num_values_(0),
229-
total_num_values_(total_num_values),
230-
decryption_buffer_(AllocateBuffer(properties_.memory_pool(), 0)) {
229+
total_num_values_(total_num_values) {
231230
if (crypto_ctx != nullptr) {
232231
crypto_ctx_ = *crypto_ctx;
233232
InitDecryption();
@@ -241,7 +240,7 @@ class SerializedPageReader : public PageReader {
241240
//
242241
// The returned Page contains references that aren't guaranteed to live
243242
// beyond the next call to NextPage(). SerializedPageReader reuses the
244-
// decryption and decompression buffers internally, so if NextPage() is
243+
// decompression buffer internally, so if NextPage() is
245244
// called then the content of previous page might be invalidated.
246245
std::shared_ptr<Page> NextPage() override;
247246

@@ -304,8 +303,6 @@ class SerializedPageReader : public PageReader {
304303
// updated by only the page ordinal.
305304
std::string data_page_aad_;
306305
std::string data_page_header_aad_;
307-
// Encryption
308-
std::shared_ptr<ResizableBuffer> decryption_buffer_;
309306
};
310307

311308
void SerializedPageReader::InitDecryption() {
@@ -477,14 +474,12 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
477474

478475
// Decrypt it if we need to
479476
if (data_decryptor_ != nullptr) {
480-
PARQUET_THROW_NOT_OK(
481-
decryption_buffer_->Resize(data_decryptor_->PlaintextLength(compressed_len),
482-
/*shrink_to_fit=*/false));
483-
compressed_len =
484-
data_decryptor_->Decrypt(page_buffer->span_as<uint8_t>(),
485-
decryption_buffer_->mutable_span_as<uint8_t>());
477+
auto decryption_buffer = AllocateBuffer(
478+
properties_.memory_pool(), data_decryptor_->PlaintextLength(compressed_len));
479+
compressed_len = data_decryptor_->Decrypt(
480+
page_buffer->span_as<uint8_t>(), decryption_buffer->mutable_span_as<uint8_t>());
486481

487-
page_buffer = decryption_buffer_;
482+
page_buffer = decryption_buffer;
488483
}
489484

490485
if (page_type == PageType::DICTIONARY_PAGE) {

0 commit comments

Comments
 (0)