Skip to content

Commit 85ea78d

Browse files
authored
GH-48337: [C++][Parquet] Improve column encryption API (#48338)
### Rationale for this change While working on PR #48336 I quickly got irritated by the clumsiness of the Parquet encryption configuration API. This issue attempts to reduce the verbosity of `ColumnEncryptionProperties` construction. ### What changes are included in this PR? 1. Remove unused `column_path` member in `ColumnEncryptionProperties` 2. Add convenience `ColumnEncryptionProperties` factory functions to avoid going through the clumsy `Builder` API ### Are these changes tested? Yes ### Are there any user-facing changes? Two deprecated constructors and a removed accessor for an useless property. **This PR includes breaking changes to public APIs.** (If there are any breaking changes to public APIs, please explain which changes are breaking. If not, you can remove this.) * GitHub Issue: #48337 Authored-by: Antoine Pitrou <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]>
1 parent ba27b8f commit 85ea78d

File tree

7 files changed

+125
-158
lines changed

7 files changed

+125
-158
lines changed

cpp/examples/parquet/low_level_api/encryption_reader_writer.cc

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,10 @@ int main(int argc, char** argv) {
6868
auto column_path1 = schema_desc.Column(5)->path()->ToDotString();
6969
auto column_path2 = schema_desc.Column(4)->path()->ToDotString();
7070

71-
parquet::ColumnEncryptionProperties::Builder encryption_col_builder0(column_path1);
72-
parquet::ColumnEncryptionProperties::Builder encryption_col_builder1(column_path2);
73-
encryption_col_builder0.key(kColumnEncryptionKey1)->key_id("kc1");
74-
encryption_col_builder1.key(kColumnEncryptionKey2)->key_id("kc2");
75-
76-
encryption_cols[column_path1] = encryption_col_builder0.build();
77-
encryption_cols[column_path2] = encryption_col_builder1.build();
71+
encryption_cols[column_path1] =
72+
parquet::ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1, "kc1");
73+
encryption_cols[column_path2] =
74+
parquet::ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey2, "kc2");
7875

7976
parquet::FileEncryptionProperties::Builder file_encryption_builder(
8077
kFooterEncryptionKey);

cpp/examples/parquet/low_level_api/encryption_reader_writer_all_crypto_options.cc

Lines changed: 20 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -173,13 +173,10 @@ void InteropTestWriteEncryptedParquetFiles(std::string root_path) {
173173
encryption_cols2;
174174
std::string path1 = "double_field";
175175
std::string path2 = "float_field";
176-
parquet::ColumnEncryptionProperties::Builder encryption_col_builder_20(path1);
177-
parquet::ColumnEncryptionProperties::Builder encryption_col_builder_21(path2);
178-
encryption_col_builder_20.key(kColumnEncryptionKey1)->key_id("kc1");
179-
encryption_col_builder_21.key(kColumnEncryptionKey2)->key_id("kc2");
180-
181-
encryption_cols2[path1] = encryption_col_builder_20.build();
182-
encryption_cols2[path2] = encryption_col_builder_21.build();
176+
encryption_cols2[path1] =
177+
parquet::ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1, "kc1");
178+
encryption_cols2[path2] =
179+
parquet::ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1, "kc2");
183180

184181
parquet::FileEncryptionProperties::Builder file_encryption_builder_2(
185182
kFooterEncryptionKey);
@@ -194,13 +191,11 @@ void InteropTestWriteEncryptedParquetFiles(std::string root_path) {
194191
// (plaintext footer mode, readable by legacy readers)
195192
std::map<std::string, std::shared_ptr<parquet::ColumnEncryptionProperties>>
196193
encryption_cols3;
197-
parquet::ColumnEncryptionProperties::Builder encryption_col_builder_30(path1);
198-
parquet::ColumnEncryptionProperties::Builder encryption_col_builder_31(path2);
199-
encryption_col_builder_30.key(kColumnEncryptionKey1)->key_id("kc1");
200-
encryption_col_builder_31.key(kColumnEncryptionKey2)->key_id("kc2");
194+
encryption_cols3[path1] =
195+
parquet::ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1, "kc1");
196+
encryption_cols3[path2] =
197+
parquet::ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1, "kc2");
201198

202-
encryption_cols3[path1] = encryption_col_builder_30.build();
203-
encryption_cols3[path2] = encryption_col_builder_31.build();
204199
parquet::FileEncryptionProperties::Builder file_encryption_builder_3(
205200
kFooterEncryptionKey);
206201

@@ -214,13 +209,11 @@ void InteropTestWriteEncryptedParquetFiles(std::string root_path) {
214209
// Use aad_prefix.
215210
std::map<std::string, std::shared_ptr<parquet::ColumnEncryptionProperties>>
216211
encryption_cols4;
217-
parquet::ColumnEncryptionProperties::Builder encryption_col_builder_40(path1);
218-
parquet::ColumnEncryptionProperties::Builder encryption_col_builder_41(path2);
219-
encryption_col_builder_40.key(kColumnEncryptionKey1)->key_id("kc1");
220-
encryption_col_builder_41.key(kColumnEncryptionKey2)->key_id("kc2");
212+
encryption_cols4[path1] =
213+
parquet::ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1, "kc1");
214+
encryption_cols4[path2] =
215+
parquet::ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1, "kc2");
221216

222-
encryption_cols4[path1] = encryption_col_builder_40.build();
223-
encryption_cols4[path2] = encryption_col_builder_41.build();
224217
parquet::FileEncryptionProperties::Builder file_encryption_builder_4(
225218
kFooterEncryptionKey);
226219

@@ -234,13 +227,11 @@ void InteropTestWriteEncryptedParquetFiles(std::string root_path) {
234227
// Use aad_prefix and disable_aad_prefix_storage.
235228
std::map<std::string, std::shared_ptr<parquet::ColumnEncryptionProperties>>
236229
encryption_cols5;
237-
parquet::ColumnEncryptionProperties::Builder encryption_col_builder_50(path1);
238-
parquet::ColumnEncryptionProperties::Builder encryption_col_builder_51(path2);
239-
encryption_col_builder_50.key(kColumnEncryptionKey1)->key_id("kc1");
240-
encryption_col_builder_51.key(kColumnEncryptionKey2)->key_id("kc2");
230+
encryption_cols5[path1] =
231+
parquet::ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1, "kc1");
232+
encryption_cols5[path2] =
233+
parquet::ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1, "kc2");
241234

242-
encryption_cols5[path1] = encryption_col_builder_50.build();
243-
encryption_cols5[path2] = encryption_col_builder_51.build();
244235
parquet::FileEncryptionProperties::Builder file_encryption_builder_5(
245236
kFooterEncryptionKey);
246237

@@ -255,13 +246,11 @@ void InteropTestWriteEncryptedParquetFiles(std::string root_path) {
255246
// Use AES_GCM_CTR_V1 algorithm.
256247
std::map<std::string, std::shared_ptr<parquet::ColumnEncryptionProperties>>
257248
encryption_cols6;
258-
parquet::ColumnEncryptionProperties::Builder encryption_col_builder_60(path1);
259-
parquet::ColumnEncryptionProperties::Builder encryption_col_builder_61(path2);
260-
encryption_col_builder_60.key(kColumnEncryptionKey1)->key_id("kc1");
261-
encryption_col_builder_61.key(kColumnEncryptionKey2)->key_id("kc2");
249+
encryption_cols6[path1] =
250+
parquet::ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1, "kc1");
251+
encryption_cols6[path2] =
252+
parquet::ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1, "kc2");
262253

263-
encryption_cols6[path1] = encryption_col_builder_60.build();
264-
encryption_cols6[path2] = encryption_col_builder_61.build();
265254
parquet::FileEncryptionProperties::Builder file_encryption_builder_6(
266255
kFooterEncryptionKey);
267256

cpp/src/parquet/encryption/crypto_factory.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ ColumnPathToEncryptionPropertiesMap CryptoFactory::GetColumnEncryptionProperties
156156
key_wrapper->GetEncryptionKeyMetadata(column_key, column_key_id, false);
157157

158158
std::shared_ptr<ColumnEncryptionProperties> cmd =
159-
ColumnEncryptionProperties::Builder(column_name)
159+
ColumnEncryptionProperties::Builder()
160160
.key(column_key)
161161
->key_metadata(column_key_key_metadata)
162162
->build();

cpp/src/parquet/encryption/encryption.cc

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -179,16 +179,12 @@ FileEncryptionProperties::Builder::disable_aad_prefix_storage() {
179179
return this;
180180
}
181181

182-
ColumnEncryptionProperties::ColumnEncryptionProperties(bool encrypted,
183-
std::string column_path,
184-
SecureString key,
182+
ColumnEncryptionProperties::ColumnEncryptionProperties(bool encrypted, SecureString key,
185183
std::string key_metadata)
186-
: column_path_(std::move(column_path)),
187-
encrypted_(encrypted),
184+
: encrypted_(encrypted),
188185
encrypted_with_footer_key_(encrypted && key.empty()),
189186
key_(std::move(key)),
190187
key_metadata_(std::move(key_metadata)) {
191-
DCHECK(!column_path_.empty());
192188
if (!encrypted) {
193189
DCHECK(key_.empty() && key_metadata_.empty());
194190
}
@@ -200,6 +196,22 @@ ColumnEncryptionProperties::ColumnEncryptionProperties(bool encrypted,
200196
}
201197
}
202198

199+
std::shared_ptr<ColumnEncryptionProperties> ColumnEncryptionProperties::Unencrypted() {
200+
return std::shared_ptr<ColumnEncryptionProperties>(
201+
new ColumnEncryptionProperties(/*encrypted=*/false, {}, {}));
202+
}
203+
204+
std::shared_ptr<ColumnEncryptionProperties> ColumnEncryptionProperties::WithFooterKey() {
205+
return std::shared_ptr<ColumnEncryptionProperties>(
206+
new ColumnEncryptionProperties(/*encrypted=*/true, {}, {}));
207+
}
208+
209+
std::shared_ptr<ColumnEncryptionProperties> ColumnEncryptionProperties::WithColumnKey(
210+
::arrow::util::SecureString key, std::string key_metadata) {
211+
return std::shared_ptr<ColumnEncryptionProperties>(new ColumnEncryptionProperties(
212+
/*encrypted=*/true, std::move(key), std::move(key_metadata)));
213+
}
214+
203215
ColumnDecryptionProperties::ColumnDecryptionProperties(std::string column_path,
204216
SecureString key)
205217
: column_path_(std::move(column_path)), key_(std::move(key)) {

cpp/src/parquet/encryption/encryption.h

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,13 @@ class PARQUET_EXPORT ColumnEncryptionProperties {
114114
public:
115115
class PARQUET_EXPORT Builder {
116116
public:
117-
/// Convenience builder for encrypted columns.
118-
explicit Builder(std::string name) : Builder(std::move(name), true) {}
117+
PARQUET_DEPRECATED("name argument is ignored, use default constructor instead")
118+
explicit Builder(const std::string& name) : encrypted_(true) {}
119119

120-
/// Convenience builder for encrypted columns.
121-
explicit Builder(const schema::ColumnPath& path)
122-
: Builder(path.ToDotString(), true) {}
120+
PARQUET_DEPRECATED("path argument is ignored, use default constructor instead")
121+
explicit Builder(const schema::ColumnPath& path) : encrypted_(true) {}
122+
123+
Builder() = default;
123124

124125
/// Set a column-specific key.
125126
/// If key is not set on an encrypted column, the column will
@@ -140,33 +141,31 @@ class PARQUET_EXPORT ColumnEncryptionProperties {
140141

141142
std::shared_ptr<ColumnEncryptionProperties> build() {
142143
return std::shared_ptr<ColumnEncryptionProperties>(
143-
new ColumnEncryptionProperties(encrypted_, column_path_, key_, key_metadata_));
144+
new ColumnEncryptionProperties(encrypted_, key_, key_metadata_));
144145
}
145146

146147
private:
147-
std::string column_path_;
148-
bool encrypted_;
148+
bool encrypted_ = true;
149149
::arrow::util::SecureString key_;
150150
std::string key_metadata_;
151-
152-
Builder(std::string path, bool encrypted)
153-
: column_path_(std::move(path)), encrypted_(encrypted) {}
154151
};
155152

156-
const std::string& column_path() const { return column_path_; }
157153
bool is_encrypted() const { return encrypted_; }
158154
bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; }
159155
const ::arrow::util::SecureString& key() const { return key_; }
160156
const std::string& key_metadata() const { return key_metadata_; }
161157

158+
static std::shared_ptr<ColumnEncryptionProperties> Unencrypted();
159+
static std::shared_ptr<ColumnEncryptionProperties> WithFooterKey();
160+
static std::shared_ptr<ColumnEncryptionProperties> WithColumnKey(
161+
::arrow::util::SecureString key, std::string key_metadata = "");
162+
162163
private:
163-
std::string column_path_;
164164
bool encrypted_;
165165
bool encrypted_with_footer_key_;
166166
::arrow::util::SecureString key_;
167167
std::string key_metadata_;
168-
explicit ColumnEncryptionProperties(bool encrypted, std::string column_path,
169-
::arrow::util::SecureString key,
168+
explicit ColumnEncryptionProperties(bool encrypted, ::arrow::util::SecureString key,
170169
std::string key_metadata);
171170
};
172171

cpp/src/parquet/encryption/properties_test.cc

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -25,29 +25,46 @@
2525
namespace parquet::encryption::test {
2626

2727
TEST(TestColumnEncryptionProperties, ColumnEncryptedWithOwnKey) {
28-
std::string column_path_1 = "column_1";
29-
ColumnEncryptionProperties::Builder column_builder_1(column_path_1);
28+
ColumnEncryptionProperties::Builder column_builder_1;
3029
column_builder_1.key(kColumnEncryptionKey1);
3130
column_builder_1.key_id("kc1");
3231
std::shared_ptr<ColumnEncryptionProperties> column_props_1 = column_builder_1.build();
3332

34-
ASSERT_EQ(column_path_1, column_props_1->column_path());
33+
ASSERT_EQ(true, column_props_1->is_encrypted());
34+
ASSERT_EQ(false, column_props_1->is_encrypted_with_footer_key());
35+
ASSERT_EQ(kColumnEncryptionKey1, column_props_1->key());
36+
ASSERT_EQ("kc1", column_props_1->key_metadata());
37+
38+
// Same with shorthand API
39+
column_props_1 =
40+
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1, "kc1");
3541
ASSERT_EQ(true, column_props_1->is_encrypted());
3642
ASSERT_EQ(false, column_props_1->is_encrypted_with_footer_key());
3743
ASSERT_EQ(kColumnEncryptionKey1, column_props_1->key());
3844
ASSERT_EQ("kc1", column_props_1->key_metadata());
3945
}
4046

4147
TEST(TestColumnEncryptionProperties, ColumnEncryptedWithFooterKey) {
42-
std::string column_path_1 = "column_1";
43-
ColumnEncryptionProperties::Builder column_builder_1(column_path_1);
48+
ColumnEncryptionProperties::Builder column_builder_1;
4449
std::shared_ptr<ColumnEncryptionProperties> column_props_1 = column_builder_1.build();
4550

46-
ASSERT_EQ(column_path_1, column_props_1->column_path());
51+
ASSERT_EQ(true, column_props_1->is_encrypted());
52+
ASSERT_EQ(true, column_props_1->is_encrypted_with_footer_key());
53+
54+
// Same with shorthand API
55+
column_props_1 = ColumnEncryptionProperties::WithFooterKey();
4756
ASSERT_EQ(true, column_props_1->is_encrypted());
4857
ASSERT_EQ(true, column_props_1->is_encrypted_with_footer_key());
4958
}
5059

60+
TEST(TestColumnEncryptionProperties, UnencryptedColumn) {
61+
ColumnEncryptionProperties::Builder column_builder_1;
62+
63+
auto column_props_1 = ColumnEncryptionProperties::Unencrypted();
64+
ASSERT_EQ(false, column_props_1->is_encrypted());
65+
ASSERT_EQ(false, column_props_1->is_encrypted_with_footer_key());
66+
}
67+
5168
// Encrypt all columns and the footer with the same key.
5269
// (uniform encryption)
5370
TEST(TestEncryptionProperties, UniformEncryption) {
@@ -74,19 +91,14 @@ TEST(TestEncryptionProperties, UniformEncryption) {
7491
TEST(TestEncryptionProperties, EncryptFooterAndTwoColumns) {
7592
std::shared_ptr<parquet::schema::ColumnPath> column_path_1 =
7693
parquet::schema::ColumnPath::FromDotString("column_1");
77-
ColumnEncryptionProperties::Builder column_builder_1(column_path_1->ToDotString());
78-
column_builder_1.key(kColumnEncryptionKey1);
79-
column_builder_1.key_id("kc1");
80-
8194
std::shared_ptr<parquet::schema::ColumnPath> column_path_2 =
8295
parquet::schema::ColumnPath::FromDotString("column_2");
83-
ColumnEncryptionProperties::Builder column_builder_2(column_path_2->ToDotString());
84-
column_builder_2.key(kColumnEncryptionKey2);
85-
column_builder_2.key_id("kc2");
8696

8797
std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>> encrypted_columns;
88-
encrypted_columns[column_path_1->ToDotString()] = column_builder_1.build();
89-
encrypted_columns[column_path_2->ToDotString()] = column_builder_2.build();
98+
encrypted_columns[column_path_1->ToDotString()] =
99+
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1, "kc1");
100+
encrypted_columns[column_path_2->ToDotString()] =
101+
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey2, "kc2");
90102

91103
FileEncryptionProperties::Builder builder(kFooterEncryptionKey);
92104
builder.footer_key_metadata("kf");
@@ -100,7 +112,6 @@ TEST(TestEncryptionProperties, EncryptFooterAndTwoColumns) {
100112
std::shared_ptr<ColumnEncryptionProperties> out_col_props_1 =
101113
props->column_encryption_properties(column_path_1->ToDotString());
102114

103-
ASSERT_EQ(column_path_1->ToDotString(), out_col_props_1->column_path());
104115
ASSERT_EQ(true, out_col_props_1->is_encrypted());
105116
ASSERT_EQ(false, out_col_props_1->is_encrypted_with_footer_key());
106117
ASSERT_EQ(kColumnEncryptionKey1, out_col_props_1->key());
@@ -109,7 +120,6 @@ TEST(TestEncryptionProperties, EncryptFooterAndTwoColumns) {
109120
std::shared_ptr<ColumnEncryptionProperties> out_col_props_2 =
110121
props->column_encryption_properties(column_path_2->ToDotString());
111122

112-
ASSERT_EQ(column_path_2->ToDotString(), out_col_props_2->column_path());
113123
ASSERT_EQ(true, out_col_props_2->is_encrypted());
114124
ASSERT_EQ(false, out_col_props_2->is_encrypted_with_footer_key());
115125
ASSERT_EQ(kColumnEncryptionKey2, out_col_props_2->key());
@@ -128,19 +138,14 @@ TEST(TestEncryptionProperties, EncryptFooterAndTwoColumns) {
128138
TEST(TestEncryptionProperties, EncryptTwoColumnsNotFooter) {
129139
std::shared_ptr<parquet::schema::ColumnPath> column_path_1 =
130140
parquet::schema::ColumnPath::FromDotString("column_1");
131-
ColumnEncryptionProperties::Builder column_builder_1(*column_path_1);
132-
column_builder_1.key(kColumnEncryptionKey1);
133-
column_builder_1.key_id("kc1");
134-
135141
std::shared_ptr<parquet::schema::ColumnPath> column_path_2 =
136142
parquet::schema::ColumnPath::FromDotString("column_2");
137-
ColumnEncryptionProperties::Builder column_builder_2(*column_path_2);
138-
column_builder_2.key(kColumnEncryptionKey2);
139-
column_builder_2.key_id("kc2");
140143

141144
std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>> encrypted_columns;
142-
encrypted_columns[column_path_1->ToDotString()] = column_builder_1.build();
143-
encrypted_columns[column_path_2->ToDotString()] = column_builder_2.build();
145+
encrypted_columns[column_path_1->ToDotString()] =
146+
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1, "kc1");
147+
encrypted_columns[column_path_2->ToDotString()] =
148+
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey2, "kc2");
144149

145150
FileEncryptionProperties::Builder builder(kFooterEncryptionKey);
146151
builder.footer_key_metadata("kf");
@@ -155,7 +160,6 @@ TEST(TestEncryptionProperties, EncryptTwoColumnsNotFooter) {
155160
std::shared_ptr<ColumnEncryptionProperties> out_col_props_1 =
156161
props->column_encryption_properties(column_path_1->ToDotString());
157162

158-
ASSERT_EQ(column_path_1->ToDotString(), out_col_props_1->column_path());
159163
ASSERT_EQ(true, out_col_props_1->is_encrypted());
160164
ASSERT_EQ(false, out_col_props_1->is_encrypted_with_footer_key());
161165
ASSERT_EQ(kColumnEncryptionKey1, out_col_props_1->key());
@@ -164,7 +168,6 @@ TEST(TestEncryptionProperties, EncryptTwoColumnsNotFooter) {
164168
std::shared_ptr<ColumnEncryptionProperties> out_col_props_2 =
165169
props->column_encryption_properties(column_path_2->ToDotString());
166170

167-
ASSERT_EQ(column_path_2->ToDotString(), out_col_props_2->column_path());
168171
ASSERT_EQ(true, out_col_props_2->is_encrypted());
169172
ASSERT_EQ(false, out_col_props_2->is_encrypted_with_footer_key());
170173
ASSERT_EQ(kColumnEncryptionKey2, out_col_props_2->key());

0 commit comments

Comments
 (0)