Skip to content

Commit 58fe02f

Browse files
authored
Refactor crypto integration with arrow (#169)
arrow >= 20.0.0 refactored the crypto API to support better multithreading; align to new API with backwards compatibility.
1 parent 4835396 commit 58fe02f

File tree

7 files changed

+573
-197
lines changed

7 files changed

+573
-197
lines changed

include/encryption_internal.h

Lines changed: 5 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -17,98 +17,8 @@
1717

1818
#pragma once
1919

20-
#include <memory>
21-
#include <string>
22-
#include <vector>
23-
24-
#include "parquet/properties.h"
25-
#include "parquet/types.h"
26-
27-
using parquet::ParquetCipher;
28-
29-
namespace parquet {
30-
namespace encryption {
31-
32-
constexpr int kGcmTagLength = 16;
33-
constexpr int kNonceLength = 12;
34-
35-
// Module types
36-
constexpr int8_t kFooter = 0;
37-
constexpr int8_t kColumnMetaData = 1;
38-
constexpr int8_t kDataPage = 2;
39-
constexpr int8_t kDictionaryPage = 3;
40-
constexpr int8_t kDataPageHeader = 4;
41-
constexpr int8_t kDictionaryPageHeader = 5;
42-
constexpr int8_t kColumnIndex = 6;
43-
constexpr int8_t kOffsetIndex = 7;
44-
45-
/// Performs AES encryption operations with GCM or CTR ciphers.
46-
class AesEncryptor {
47-
public:
48-
static AesEncryptor* Make(ParquetCipher::type alg_id, int key_len, bool metadata,
49-
std::vector<AesEncryptor*>* all_encryptors);
50-
51-
~AesEncryptor();
52-
53-
/// Size difference between plaintext and ciphertext, for this cipher.
54-
int CiphertextSizeDelta();
55-
56-
/// Encrypts plaintext with the key and aad. Key length is passed only for validation.
57-
/// If different from value in constructor, exception will be thrown.
58-
int Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key,
59-
int key_len, const uint8_t* aad, int aad_len, uint8_t* ciphertext);
60-
61-
/// Encrypts plaintext footer, in order to compute footer signature (tag).
62-
int SignedFooterEncrypt(const uint8_t* footer, int footer_len, const uint8_t* key,
63-
int key_len, const uint8_t* aad, int aad_len,
64-
const uint8_t* nonce, uint8_t* encrypted_footer);
65-
66-
void WipeOut();
67-
68-
private:
69-
/// Can serve one key length only. Possible values: 16, 24, 32 bytes.
70-
explicit AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadata);
71-
// PIMPL Idiom
72-
class AesEncryptorImpl;
73-
std::unique_ptr<AesEncryptorImpl> impl_;
74-
};
75-
76-
/// Performs AES decryption operations with GCM or CTR ciphers.
77-
class AesDecryptor {
78-
public:
79-
static AesDecryptor* Make(ParquetCipher::type alg_id, int key_len, bool metadata,
80-
std::vector<AesDecryptor*>* all_decryptors);
81-
82-
~AesDecryptor();
83-
void WipeOut();
84-
85-
/// Size difference between plaintext and ciphertext, for this cipher.
86-
int CiphertextSizeDelta();
87-
88-
/// Decrypts ciphertext with the key and aad. Key length is passed only for
89-
/// validation. If different from value in constructor, exception will be thrown.
90-
int Decrypt(const uint8_t* ciphertext, int ciphertext_len, const uint8_t* key,
91-
int key_len, const uint8_t* aad, int aad_len, uint8_t* plaintext);
92-
93-
private:
94-
/// Can serve one key length only. Possible values: 16, 24, 32 bytes.
95-
explicit AesDecryptor(ParquetCipher::type alg_id, int key_len, bool metadata);
96-
// PIMPL Idiom
97-
class AesDecryptorImpl;
98-
std::unique_ptr<AesDecryptorImpl> impl_;
99-
};
100-
101-
std::string CreateModuleAad(const std::string& file_aad, int8_t module_type,
102-
int16_t row_group_ordinal, int16_t column_ordinal,
103-
int16_t page_ordinal);
104-
105-
std::string CreateFooterAad(const std::string& aad_prefix_bytes);
106-
107-
// Update last two bytes of page (or page header) module AAD
108-
void QuickUpdatePageAad(const std::string& AAD, int16_t new_page_ordinal);
109-
110-
// Wraps OpenSSL RAND_bytes function
111-
void RandBytes(unsigned char* buf, int num);
112-
113-
} // namespace encryption
114-
} // namespace parquet
20+
#if ARROW_VERSION_MAJOR < 20
21+
#include "encryption_internal_19.h"
22+
#else
23+
#include "encryption_internal_20.h"
24+
#endif

include/encryption_internal_19.h

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#pragma once
19+
20+
#include <memory>
21+
#include <string>
22+
#include <vector>
23+
24+
#include "parquet/properties.h"
25+
#include "parquet/types.h"
26+
27+
using parquet::ParquetCipher;
28+
29+
namespace parquet {
30+
namespace encryption {
31+
32+
constexpr int kGcmTagLength = 16;
33+
constexpr int kNonceLength = 12;
34+
35+
// Module types
36+
constexpr int8_t kFooter = 0;
37+
constexpr int8_t kColumnMetaData = 1;
38+
constexpr int8_t kDataPage = 2;
39+
constexpr int8_t kDictionaryPage = 3;
40+
constexpr int8_t kDataPageHeader = 4;
41+
constexpr int8_t kDictionaryPageHeader = 5;
42+
constexpr int8_t kColumnIndex = 6;
43+
constexpr int8_t kOffsetIndex = 7;
44+
45+
/// Performs AES encryption operations with GCM or CTR ciphers.
46+
class AesEncryptor {
47+
public:
48+
static AesEncryptor* Make(ParquetCipher::type alg_id, int key_len, bool metadata,
49+
std::vector<AesEncryptor*>* all_encryptors);
50+
51+
~AesEncryptor();
52+
53+
/// Size difference between plaintext and ciphertext, for this cipher.
54+
int CiphertextSizeDelta();
55+
56+
/// Encrypts plaintext with the key and aad. Key length is passed only for validation.
57+
/// If different from value in constructor, exception will be thrown.
58+
int Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key,
59+
int key_len, const uint8_t* aad, int aad_len, uint8_t* ciphertext);
60+
61+
/// Encrypts plaintext footer, in order to compute footer signature (tag).
62+
int SignedFooterEncrypt(const uint8_t* footer, int footer_len, const uint8_t* key,
63+
int key_len, const uint8_t* aad, int aad_len,
64+
const uint8_t* nonce, uint8_t* encrypted_footer);
65+
66+
void WipeOut();
67+
68+
private:
69+
/// Can serve one key length only. Possible values: 16, 24, 32 bytes.
70+
explicit AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadata);
71+
// PIMPL Idiom
72+
class AesEncryptorImpl;
73+
std::unique_ptr<AesEncryptorImpl> impl_;
74+
};
75+
76+
/// Performs AES decryption operations with GCM or CTR ciphers.
77+
class AesDecryptor {
78+
public:
79+
static AesDecryptor* Make(ParquetCipher::type alg_id, int key_len, bool metadata,
80+
std::vector<AesDecryptor*>* all_decryptors);
81+
82+
~AesDecryptor();
83+
void WipeOut();
84+
85+
/// Size difference between plaintext and ciphertext, for this cipher.
86+
int CiphertextSizeDelta();
87+
88+
/// Decrypts ciphertext with the key and aad. Key length is passed only for
89+
/// validation. If different from value in constructor, exception will be thrown.
90+
int Decrypt(const uint8_t* ciphertext, int ciphertext_len, const uint8_t* key,
91+
int key_len, const uint8_t* aad, int aad_len, uint8_t* plaintext);
92+
93+
private:
94+
/// Can serve one key length only. Possible values: 16, 24, 32 bytes.
95+
explicit AesDecryptor(ParquetCipher::type alg_id, int key_len, bool metadata);
96+
// PIMPL Idiom
97+
class AesDecryptorImpl;
98+
std::unique_ptr<AesDecryptorImpl> impl_;
99+
};
100+
101+
std::string CreateModuleAad(const std::string& file_aad, int8_t module_type,
102+
int16_t row_group_ordinal, int16_t column_ordinal,
103+
int16_t page_ordinal);
104+
105+
std::string CreateFooterAad(const std::string& aad_prefix_bytes);
106+
107+
// Update last two bytes of page (or page header) module AAD
108+
void QuickUpdatePageAad(const std::string& AAD, int16_t new_page_ordinal);
109+
110+
// Wraps OpenSSL RAND_bytes function
111+
void RandBytes(unsigned char* buf, int num);
112+
113+
} // namespace encryption
114+
} // namespace parquet

include/encryption_internal_20.h

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#pragma once
19+
20+
#include <memory>
21+
#include <string>
22+
#include <vector>
23+
24+
#include "arrow/util/span.h"
25+
#include "parquet/properties.h"
26+
#include "parquet/types.h"
27+
28+
using parquet::ParquetCipher;
29+
30+
namespace parquet::encryption {
31+
32+
constexpr int32_t kGcmTagLength = 16;
33+
constexpr int32_t kNonceLength = 12;
34+
35+
// Module types
36+
constexpr int8_t kFooter = 0;
37+
constexpr int8_t kColumnMetaData = 1;
38+
constexpr int8_t kDataPage = 2;
39+
constexpr int8_t kDictionaryPage = 3;
40+
constexpr int8_t kDataPageHeader = 4;
41+
constexpr int8_t kDictionaryPageHeader = 5;
42+
constexpr int8_t kColumnIndex = 6;
43+
constexpr int8_t kOffsetIndex = 7;
44+
constexpr int8_t kBloomFilterHeader = 8;
45+
constexpr int8_t kBloomFilterBitset = 9;
46+
47+
/// Performs AES encryption operations with GCM or CTR ciphers.
48+
class PARQUET_EXPORT AesEncryptor {
49+
public:
50+
/// Can serve one key length only. Possible values: 16, 24, 32 bytes.
51+
/// If write_length is true, prepend ciphertext length to the ciphertext
52+
explicit AesEncryptor(ParquetCipher::type alg_id, int32_t key_len, bool metadata,
53+
bool write_length = true);
54+
55+
static std::unique_ptr<AesEncryptor> Make(ParquetCipher::type alg_id, int32_t key_len,
56+
bool metadata, bool write_length = true);
57+
58+
~AesEncryptor();
59+
60+
/// The size of the ciphertext, for this cipher and the specified plaintext length.
61+
[[nodiscard]] int32_t CiphertextLength(int64_t plaintext_len) const;
62+
63+
/// Encrypts plaintext with the key and aad. Key length is passed only for validation.
64+
/// If different from value in constructor, exception will be thrown.
65+
int32_t Encrypt(::arrow::util::span<const uint8_t> plaintext,
66+
::arrow::util::span<const uint8_t> key,
67+
::arrow::util::span<const uint8_t> aad,
68+
::arrow::util::span<uint8_t> ciphertext);
69+
70+
/// Encrypts plaintext footer, in order to compute footer signature (tag).
71+
int32_t SignedFooterEncrypt(::arrow::util::span<const uint8_t> footer,
72+
::arrow::util::span<const uint8_t> key,
73+
::arrow::util::span<const uint8_t> aad,
74+
::arrow::util::span<const uint8_t> nonce,
75+
::arrow::util::span<uint8_t> encrypted_footer);
76+
77+
private:
78+
// PIMPL Idiom
79+
class AesEncryptorImpl;
80+
std::unique_ptr<AesEncryptorImpl> impl_;
81+
};
82+
83+
/// Performs AES decryption operations with GCM or CTR ciphers.
84+
class PARQUET_EXPORT AesDecryptor {
85+
public:
86+
/// \brief Construct an AesDecryptor
87+
///
88+
/// \param alg_id the encryption algorithm to use
89+
/// \param key_len key length. Possible values: 16, 24, 32 bytes.
90+
/// \param metadata if true then this is a metadata decryptor
91+
/// \param contains_length if true, expect ciphertext length prepended to the ciphertext
92+
explicit AesDecryptor(ParquetCipher::type alg_id, int32_t key_len, bool metadata,
93+
bool contains_length = true);
94+
95+
static std::unique_ptr<AesDecryptor> Make(ParquetCipher::type alg_id, int32_t key_len,
96+
bool metadata);
97+
98+
~AesDecryptor();
99+
100+
/// The size of the plaintext, for this cipher and the specified ciphertext length.
101+
[[nodiscard]] int32_t PlaintextLength(int32_t ciphertext_len) const;
102+
103+
/// The size of the ciphertext, for this cipher and the specified plaintext length.
104+
[[nodiscard]] int32_t CiphertextLength(int32_t plaintext_len) const;
105+
106+
/// Decrypts ciphertext with the key and aad. Key length is passed only for
107+
/// validation. If different from value in constructor, exception will be thrown.
108+
/// The caller is responsible for ensuring that the plaintext buffer is at least as
109+
/// large as PlaintextLength(ciphertext_len).
110+
int32_t Decrypt(::arrow::util::span<const uint8_t> ciphertext,
111+
::arrow::util::span<const uint8_t> key,
112+
::arrow::util::span<const uint8_t> aad,
113+
::arrow::util::span<uint8_t> plaintext);
114+
115+
private:
116+
// PIMPL Idiom
117+
class AesDecryptorImpl;
118+
std::unique_ptr<AesDecryptorImpl> impl_;
119+
};
120+
121+
std::string CreateModuleAad(const std::string& file_aad, int8_t module_type,
122+
int16_t row_group_ordinal, int16_t column_ordinal,
123+
int32_t page_ordinal);
124+
125+
std::string CreateFooterAad(const std::string& aad_prefix_bytes);
126+
127+
// Update last two bytes of page (or page header) module AAD
128+
void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD);
129+
130+
// Wraps OpenSSL RAND_bytes function
131+
void RandBytes(unsigned char* buf, size_t num);
132+
133+
// Ensure OpenSSL is initialized.
134+
//
135+
// This is only necessary in specific situations since OpenSSL otherwise
136+
// initializes itself automatically. For example, under Valgrind, a memory
137+
// leak will be reported if OpenSSL is initialized for the first time from
138+
// a worker thread; calling this function from the main thread prevents this.
139+
void EnsureBackendInitialized();
140+
141+
} // namespace parquet::encryption

0 commit comments

Comments
 (0)