-
Notifications
You must be signed in to change notification settings - Fork 141
Expand file tree
/
Copy pathgroup_by_agg.h
More file actions
86 lines (75 loc) · 2.9 KB
/
group_by_agg.h
File metadata and controls
86 lines (75 loc) · 2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
// Copyright 2025 Ant Group Co., Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "libspu/kernel/hal/group_by_agg.h"
namespace spu::kernel::hlo {
enum class GroupByAggMode {
// Ref: https://ieeexplore.ieee.org/document/9835540
// Scape: Scalable Collaborative Analytics System on Private Database with
// Malicious Security
//
// Using prefix-sum network
PrefixSumMode,
// Ref: https://eprint.iacr.org/2024/141
// Secure Statistical Analysis on Multiple Datasets: Join and Group-By
// Only use sort + permute
DirectMode,
// Automatically choose the best mode (heuristic)
AutoMode,
};
enum class OutputFormat {
// sort by keys
GroupedOrder,
// sort by keys, but only one of each key kept
OutputOrder,
};
struct GroupByAggOptions {
// if true, then the valid bits also cover the bits of payloads
bool valid_bits_include_payloads = false;
/// Here are two ways to extract the valid results:
/// 1. using a valid bit mask to indicate the valid positions
/// 2. user can find the valid results by themselves
///
// if true, return a flag vector indicating whether each output key is valid
bool return_valid_flag = false;
// if true, only output the aggregated payloads
// you can set this to true when you can generate the keys locally
bool drop_keys = false;
GroupByAggMode mode = GroupByAggMode::AutoMode;
OutputFormat output_format = OutputFormat::OutputOrder;
// only valid when output_format == OutputFormat::OutputOrder
// if true, then only keep the unique key in the output
// so, THE UNIQUE COUNT OF KEYS must be revealed to all parties
// but in some cases, this may lead to better performance
bool unsafe_output_order_drop_rest = false;
};
enum class AggFunc {
Sum,
Count,
Avg,
Max,
Min,
Percentile,
};
// TODO(zjj): add unsigned hint after implementing unsigned optimization of sort
// Note: we only support 1d keys and payloads now.
// To simplify the implementation, we require all keys/payloads to have the same
// visibility.
// we may support several agg_func later.
std::vector<Value> GroupByAgg(SPUContext* ctx,
absl::Span<spu::Value const> keys,
absl::Span<spu::Value const> payloads,
AggFunc agg_func, absl::Span<int64_t> valid_bits,
const GroupByAggOptions& options = {});
} // namespace spu::kernel::hlo