Skip to content

Commit 052e0aa

Browse files
authored
GH-41246: [C++][Python] Simplify nested field encryption configuration (#45462)
### Rationale for this change Columns can b encrypted with individual keys. For this, the column name have to be set in `EncryptionConfiguration::column_keys`. This poses the following challenges for columns with nested fields like `MapType`, `ListType`, and `StructType`. Encrypting a column of such type requires providing an encryption key for all nested (leaf) fields. Ideally, the column name should be sufficient (as it is for any other data type) to encrypt all nested fields. ### What changes are included in this PR? The column name can be used to encrypt all nested fields of `StrutType`, `MapType`, and `ListType` columns with the same encryption key. The current column naming scheme can still be used for backward compatibility. ### Are these changes tested? Tested in C++ and Python. ### Are there any user-facing changes? Column encryption can be configured with less code and more intuitive naming. Documentation and examples updated. Fixes #41246. * GitHub Issue: #41246 Authored-by: Enrico Minack <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]>
1 parent 8fcab2f commit 052e0aa

File tree

10 files changed

+666
-207
lines changed

10 files changed

+666
-207
lines changed

cpp/examples/arrow/parquet_column_encryption.cc

Lines changed: 19 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "arrow/dataset/file_parquet.h"
2020
#include "arrow/dataset/parquet_encryption_config.h"
2121
#include "arrow/filesystem/localfs.h"
22+
#include "arrow/json/from_string.h"
2223
#include "arrow/util/secure_string.h"
2324
#include "parquet/encryption/crypto_factory.h"
2425
#include "parquet/encryption/test_in_memory_kms.h"
@@ -30,73 +31,28 @@ namespace fs = arrow::fs;
3031
namespace ds = arrow::dataset;
3132

3233
arrow::Result<std::shared_ptr<arrow::Table>> GetTable() {
33-
auto int_builder = arrow::Int32Builder();
34-
35-
std::shared_ptr<arrow::Array> arr_i;
36-
ARROW_RETURN_NOT_OK(int_builder.AppendValues({1, 3, 5, 7, 1}));
37-
ARROW_RETURN_NOT_OK(int_builder.Finish(&arr_i));
38-
34+
const auto& int_type = arrow::int32();
3935
auto struct_type = arrow::struct_({{"a", arrow::int32()}, {"b", arrow::int64()}});
40-
auto pool = arrow::default_memory_pool();
41-
auto a_builder = std::make_shared<arrow::Int32Builder>();
42-
auto b_builder = std::make_shared<arrow::Int64Builder>();
43-
auto struct_builder = arrow::StructBuilder(struct_type, pool, {a_builder, b_builder});
44-
45-
std::shared_ptr<arrow::Array> arr_struct;
46-
ARROW_RETURN_NOT_OK(struct_builder.Append());
47-
ARROW_RETURN_NOT_OK(a_builder->Append(2));
48-
ARROW_RETURN_NOT_OK(b_builder->Append(20));
49-
ARROW_RETURN_NOT_OK(struct_builder.Append());
50-
ARROW_RETURN_NOT_OK(a_builder->Append(4));
51-
ARROW_RETURN_NOT_OK(b_builder->Append(40));
52-
ARROW_RETURN_NOT_OK(struct_builder.Append());
53-
ARROW_RETURN_NOT_OK(a_builder->Append(6));
54-
ARROW_RETURN_NOT_OK(b_builder->Append(60));
55-
ARROW_RETURN_NOT_OK(struct_builder.Append());
56-
ARROW_RETURN_NOT_OK(a_builder->Append(8));
57-
ARROW_RETURN_NOT_OK(b_builder->Append(80));
58-
ARROW_RETURN_NOT_OK(struct_builder.Append());
59-
ARROW_RETURN_NOT_OK(a_builder->Append(10));
60-
ARROW_RETURN_NOT_OK(b_builder->Append(100));
61-
ARROW_RETURN_NOT_OK(struct_builder.Finish(&arr_struct));
62-
6336
auto map_type = arrow::map(arrow::int32(), arrow::utf8());
64-
auto key_builder = std::make_shared<arrow::Int32Builder>();
65-
auto item_builder = std::make_shared<arrow::StringBuilder>();
66-
auto map_builder = arrow::MapBuilder(pool, key_builder, item_builder, map_type);
67-
68-
std::shared_ptr<arrow::Array> arr_map;
69-
ARROW_RETURN_NOT_OK(map_builder.Append());
70-
ARROW_RETURN_NOT_OK(key_builder->AppendValues({2, 4}));
71-
ARROW_RETURN_NOT_OK(item_builder->AppendValues({"2", "4"}));
72-
ARROW_RETURN_NOT_OK(map_builder.Append());
73-
ARROW_RETURN_NOT_OK(key_builder->AppendValues({6}));
74-
ARROW_RETURN_NOT_OK(item_builder->AppendValues({"6"}));
75-
ARROW_RETURN_NOT_OK(map_builder.Append());
76-
ARROW_RETURN_NOT_OK(map_builder.Append());
77-
ARROW_RETURN_NOT_OK(key_builder->AppendValues({8, 10}));
78-
ARROW_RETURN_NOT_OK(item_builder->AppendValues({"8", "10"}));
79-
ARROW_RETURN_NOT_OK(map_builder.Append());
80-
ARROW_RETURN_NOT_OK(map_builder.Finish(&arr_map));
81-
8237
auto list_type = arrow::list(arrow::int32());
83-
auto value_builder = std::make_shared<arrow::Int32Builder>();
84-
auto list_builder = arrow::ListBuilder(pool, value_builder, list_type);
85-
86-
std::shared_ptr<arrow::Array> arr_list;
87-
ARROW_RETURN_NOT_OK(list_builder.Append());
88-
ARROW_RETURN_NOT_OK(value_builder->AppendValues({1, 2, 3}));
89-
ARROW_RETURN_NOT_OK(list_builder.Append());
90-
ARROW_RETURN_NOT_OK(value_builder->AppendValues({4, 5, 6}));
91-
ARROW_RETURN_NOT_OK(list_builder.Append());
92-
ARROW_RETURN_NOT_OK(value_builder->AppendValues({7}));
93-
ARROW_RETURN_NOT_OK(list_builder.Append());
94-
ARROW_RETURN_NOT_OK(value_builder->AppendValues({8}));
95-
ARROW_RETURN_NOT_OK(list_builder.Append());
96-
ARROW_RETURN_NOT_OK(list_builder.Finish(&arr_list));
38+
39+
ARROW_ASSIGN_OR_RAISE(auto arr_i,
40+
arrow::json::ArrayFromJSONString(int_type, "[1, 3, 5, 7, 1]"));
41+
ARROW_ASSIGN_OR_RAISE(
42+
auto arr_struct,
43+
arrow::json::ArrayFromJSONString(
44+
struct_type, "[[2, 20], [4, 40], [6, 60], [8, 80], [10, 100]]"));
45+
ARROW_ASSIGN_OR_RAISE(
46+
auto arr_map,
47+
arrow::json::ArrayFromJSONString(
48+
map_type,
49+
R"([[[2, "2"], [4, "4"]], [[6, "6"]], [], [[8, "8"], [10, "10"]], null])"));
50+
ARROW_ASSIGN_OR_RAISE(auto arr_list,
51+
arrow::json::ArrayFromJSONString(
52+
list_type, "[[1, 2, 3], [4, 5, 6], [7], [8], null]"));
9753

9854
auto schema = arrow::schema({
99-
arrow::field("i", arrow::int32()),
55+
arrow::field("i", int_type),
10056
arrow::field("s", struct_type),
10157
arrow::field("m", map_type),
10258
arrow::field("l", list_type),
@@ -131,8 +87,7 @@ arrow::Status WriteEncryptedFile(const std::string& path_to_file) {
13187
// Set write options with encryption configuration.
13288
auto encryption_config = std::make_shared<parquet::encryption::EncryptionConfiguration>(
13389
std::string("footerKeyId"));
134-
encryption_config->column_keys =
135-
"columnKeyId: i, s.a, s.b, m.key_value.key, m.key_value.value, l.list.element";
90+
encryption_config->column_keys = "columnKeyId: i, s, m, l";
13691

13792
auto parquet_encryption_config = std::make_shared<ds::ParquetEncryptionConfig>();
13893
// Directly assign shared_ptr objects to ParquetEncryptionConfig members.

0 commit comments

Comments
 (0)