Skip to content

Commit 0b668fc

Browse files
authored
Flat container builders (#507)
The PR introduces new structures to be used in cosmetic_filters: FlatSerialize trait for things that can be serialized to flatbuffer; Builder structure to help construct a serializable representation + unit tests; Migrate network filter to the new API; Increase MIN_ALIGNMENT to 8 to support using u64 as a key.
1 parent ec3a345 commit 0b668fc

File tree

13 files changed

+1289
-114
lines changed

13 files changed

+1289
-114
lines changed

src/blocker.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -440,11 +440,9 @@ impl Blocker {
440440
network_filters: Vec<crate::filters::network::NetworkFilter>,
441441
options: &BlockerOptions,
442442
) -> Self {
443-
use crate::filters::fb_builder::FlatBufferBuilder;
444-
use crate::filters::fb_network::FilterDataContext;
443+
use crate::filters::{fb_builder::make_flatbuffer, fb_network::FilterDataContext};
445444

446-
let memory =
447-
FlatBufferBuilder::make_flatbuffer(network_filters, options.enable_optimizations);
445+
let memory = make_flatbuffer(network_filters, options.enable_optimizations);
448446
let filter_data_context = FilterDataContext::new(memory);
449447
Self::from_context(filter_data_context)
450448
}

src/engine.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
use crate::blocker::{Blocker, BlockerResult};
44
use crate::cosmetic_filter_cache::{CosmeticFilterCache, UrlSpecificResources};
5-
use crate::filters::fb_builder::FlatBufferBuilder;
5+
use crate::filters::fb_builder::make_flatbuffer;
66
use crate::filters::fb_network::{FilterDataContext, FilterDataContextRef};
77
use crate::lists::{FilterSet, ParseOptions};
88
use crate::regex_manager::RegexManagerDiscardPolicy;
@@ -103,7 +103,7 @@ impl Engine {
103103
..
104104
} = set;
105105

106-
let memory = FlatBufferBuilder::make_flatbuffer(network_filters, optimize);
106+
let memory = make_flatbuffer(network_filters, optimize);
107107

108108
let filter_data_context = FilterDataContext::new(memory);
109109

src/filters/fb_builder.rs

Lines changed: 115 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ use std::vec;
99
use flatbuffers::WIPOffset;
1010

1111
use crate::filters::network::{NetworkFilter, NetworkFilterMaskHelper};
12+
use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder;
13+
use crate::flatbuffers::containers::flat_serialize::{FlatBuilder, FlatSerialize, WIPFlatVec};
1214
use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory;
1315
use crate::network_filter_list::token_histogram;
1416
use crate::optimizer;
@@ -29,29 +31,20 @@ pub(crate) enum NetworkFilterListId {
2931
}
3032

3133
#[derive(Default, Clone)]
32-
struct FilterListBuilder {
34+
struct NetworkFilterListBuilder {
3335
filters: Vec<NetworkFilter>,
36+
optimize: bool,
3437
}
3538

36-
pub(crate) struct FlatBufferBuilder {
37-
lists: Vec<FilterListBuilder>,
38-
39+
#[derive(Default)]
40+
struct EngineFlatBuilder<'a> {
41+
fb_builder: flatbuffers::FlatBufferBuilder<'a>,
3942
unique_domains_hashes: Vec<Hash>,
4043
unique_domains_hashes_map: HashMap<Hash, u32>,
41-
index: u32,
4244
}
4345

44-
impl FlatBufferBuilder {
45-
pub fn new(list_count: usize) -> Self {
46-
Self {
47-
lists: vec![FilterListBuilder::default(); list_count],
48-
unique_domains_hashes: vec![],
49-
unique_domains_hashes_map: HashMap::new(),
50-
index: 0,
51-
}
52-
}
53-
54-
fn get_or_insert_unique_domain_hash(&mut self, h: &Hash) -> u32 {
46+
impl<'a> EngineFlatBuilder<'a> {
47+
pub fn get_or_insert_unique_domain_hash(&mut self, h: &Hash) -> u32 {
5548
if let Some(&index) = self.unique_domains_hashes_map.get(h) {
5649
return index;
5750
}
@@ -61,33 +54,64 @@ impl FlatBufferBuilder {
6154
index
6255
}
6356

64-
pub fn add_filter(&mut self, network_filter: NetworkFilter, list_id: u32) {
65-
self.lists[list_id as usize].filters.push(network_filter);
57+
pub fn finish(
58+
&mut self,
59+
network_rules: WIPFlatVec<'a, NetworkFilterListBuilder, EngineFlatBuilder<'a>>,
60+
) -> VerifiedFlatbufferMemory {
61+
let unique_domains_hashes =
62+
Some(self.fb_builder.create_vector(&self.unique_domains_hashes));
63+
let network_rules = Some(network_rules);
64+
let engine = fb::Engine::create(
65+
self.raw_builder(),
66+
&fb::EngineArgs {
67+
network_rules,
68+
unique_domains_hashes,
69+
},
70+
);
71+
self.raw_builder().finish(engine, None);
72+
VerifiedFlatbufferMemory::from_builder(self.raw_builder())
6673
}
74+
}
6775

68-
fn write_filter<'a>(
69-
&mut self,
70-
builder: &mut flatbuffers::FlatBufferBuilder<'a>,
76+
impl<'a> FlatBuilder<'a> for EngineFlatBuilder<'a> {
77+
fn create_string(&mut self, s: &str) -> WIPOffset<&'a str> {
78+
self.fb_builder.create_string(s)
79+
}
80+
81+
fn raw_builder(&mut self) -> &mut flatbuffers::FlatBufferBuilder<'a> {
82+
&mut self.fb_builder
83+
}
84+
}
85+
86+
struct NetworkRulesBuilder {
87+
lists: Vec<NetworkFilterListBuilder>,
88+
}
89+
90+
impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for &NetworkFilter {
91+
type Output = WIPOffset<fb::NetworkFilter<'a>>;
92+
93+
fn serialize(
7194
network_filter: &NetworkFilter,
95+
builder: &mut EngineFlatBuilder<'a>,
7296
) -> WIPOffset<fb::NetworkFilter<'a>> {
7397
let opt_domains = network_filter.opt_domains.as_ref().map(|v| {
7498
let mut o: Vec<u32> = v
7599
.iter()
76-
.map(|x| self.get_or_insert_unique_domain_hash(x))
100+
.map(|x| builder.get_or_insert_unique_domain_hash(x))
77101
.collect();
78102
o.sort_unstable();
79103
o.dedup();
80-
builder.create_vector(&o)
104+
FlatSerialize::serialize(o, builder)
81105
});
82106

83107
let opt_not_domains = network_filter.opt_not_domains.as_ref().map(|v| {
84108
let mut o: Vec<u32> = v
85109
.iter()
86-
.map(|x| self.get_or_insert_unique_domain_hash(x))
110+
.map(|x| builder.get_or_insert_unique_domain_hash(x))
87111
.collect();
88112
o.sort_unstable();
89113
o.dedup();
90-
builder.create_vector(&o)
114+
FlatSerialize::serialize(o, builder)
91115
});
92116

93117
let modifier_option = network_filter
@@ -111,7 +135,7 @@ impl FlatBufferBuilder {
111135
.iter()
112136
.map(|s| builder.create_string(s))
113137
.collect();
114-
Some(builder.create_vector(&offsets))
138+
Some(FlatSerialize::serialize(offsets, builder))
115139
} else {
116140
None
117141
};
@@ -121,8 +145,8 @@ impl FlatBufferBuilder {
121145
.as_ref()
122146
.map(|v| builder.create_string(v.as_str()));
123147

124-
let filter = fb::NetworkFilter::create(
125-
builder,
148+
let network_filter = fb::NetworkFilter::create(
149+
&mut builder.fb_builder,
126150
&fb::NetworkFilterArgs {
127151
mask: network_filter.mask.bits(),
128152
patterns,
@@ -135,53 +159,32 @@ impl FlatBufferBuilder {
135159
},
136160
);
137161

138-
self.index += 1;
139-
140-
filter
162+
network_filter
141163
}
164+
}
142165

143-
pub fn finish(&mut self, optimize: bool) -> VerifiedFlatbufferMemory {
144-
let mut builder = flatbuffers::FlatBufferBuilder::new();
145-
let mut flat_network_rules = vec![];
146-
147-
let lists = std::mem::take(&mut self.lists);
148-
for (list_id, list) in lists.into_iter().enumerate() {
149-
// Don't optimize removeparam, since it can fuse filters without respecting distinct
150-
let optimize = optimize && list_id != NetworkFilterListId::RemoveParam as usize;
151-
152-
flat_network_rules.push(self.write_filter_list(&mut builder, list.filters, optimize));
166+
impl NetworkFilterListBuilder {
167+
fn new(optimize: bool) -> Self {
168+
Self {
169+
filters: vec![],
170+
optimize,
153171
}
154-
155-
// Create vectors first to avoid simultaneous mutable borrows of `builder`.
156-
let network_rules = builder.create_vector(&flat_network_rules);
157-
let unique_vec = builder.create_vector(&self.unique_domains_hashes);
158-
159-
let root = fb::Engine::create(
160-
&mut builder,
161-
&fb::EngineArgs {
162-
network_rules: Some(network_rules),
163-
unique_domains_hashes: Some(unique_vec),
164-
},
165-
);
166-
167-
builder.finish(root, None);
168-
169-
// TODO: consider using builder.collapse() to avoid reallocating memory.
170-
VerifiedFlatbufferMemory::from_builder(&builder)
171172
}
173+
}
172174

173-
pub fn write_filter_list<'a>(
174-
&mut self,
175-
builder: &mut flatbuffers::FlatBufferBuilder<'a>,
176-
filters: Vec<NetworkFilter>,
177-
optimize: bool,
175+
impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder {
176+
type Output = WIPOffset<fb::NetworkFilterList<'a>>;
177+
fn serialize(
178+
rule_list: Self,
179+
builder: &mut EngineFlatBuilder<'a>,
178180
) -> WIPOffset<fb::NetworkFilterList<'a>> {
179181
let mut filter_map = HashMap::<ShortHash, Vec<WIPOffset<fb::NetworkFilter<'a>>>>::new();
180182

181183
let mut optimizable = HashMap::<ShortHash, Vec<NetworkFilter>>::new();
182184

183185
// Compute tokens for all filters
184-
let filter_tokens: Vec<_> = filters
186+
let filter_tokens: Vec<_> = rule_list
187+
.filters
185188
.into_iter()
186189
.map(|filter| {
187190
let tokens = filter.get_tokens();
@@ -193,11 +196,11 @@ impl FlatBufferBuilder {
193196
let (total_number_of_tokens, tokens_histogram) = token_histogram(&filter_tokens);
194197

195198
{
196-
for (network_filter, multi_tokens) in filter_tokens {
197-
let flat_filter = if !optimize
199+
for (network_filter, multi_tokens) in filter_tokens.into_iter() {
200+
let flat_filter = if !rule_list.optimize
198201
|| !optimizer::is_filter_optimizable_by_patterns(&network_filter)
199202
{
200-
Some(self.write_filter(builder, &network_filter))
203+
Some(FlatSerialize::serialize(&network_filter, builder))
201204
} else {
202205
None
203206
};
@@ -232,7 +235,7 @@ impl FlatBufferBuilder {
232235
}
233236
}
234237

235-
if optimize {
238+
if rule_list.optimize {
236239
// Sort the entries to ensure deterministic iteration order
237240
let mut optimizable_entries: Vec<_> = optimizable.drain().collect();
238241
optimizable_entries.sort_unstable_by_key(|(token, _)| *token);
@@ -241,7 +244,7 @@ impl FlatBufferBuilder {
241244
let optimized = optimizer::optimize(v);
242245

243246
for filter in optimized {
244-
let flat_filter = self.write_filter(builder, &filter);
247+
let flat_filter = FlatSerialize::serialize(&filter, builder);
245248
filter_map.entry(token).or_default().push(flat_filter);
246249
}
247250
}
@@ -252,47 +255,38 @@ impl FlatBufferBuilder {
252255
);
253256
}
254257

255-
let len = filter_map.len();
256-
257-
// Convert filter_map keys to a sorted vector of (hash, filter_indices).
258-
let mut entries: Vec<_> = filter_map.drain().collect();
259-
entries.sort_unstable_by_key(|(k, _)| *k);
260-
261-
// Convert sorted_entries to two flatbuffers vectors.
262-
let mut flat_index: Vec<ShortHash> = Vec::with_capacity(len);
263-
let mut flat_values: Vec<_> = Vec::with_capacity(len);
264-
for (key, filter_indices) in entries {
265-
for &filter_index in &filter_indices {
266-
flat_index.push(key);
267-
flat_values.push(filter_index);
268-
}
269-
}
270-
271-
let filter_map_index = builder.create_vector(&flat_index);
272-
let filter_map_values = builder.create_vector(&flat_values);
258+
let flat_filter_map_builder = FlatMultiMapBuilder::from_filter_map(filter_map);
259+
let flat_filter_map = FlatMultiMapBuilder::finish(flat_filter_map_builder, builder);
273260

274261
fb::NetworkFilterList::create(
275-
builder,
262+
builder.raw_builder(),
276263
&fb::NetworkFilterListArgs {
277-
filter_map_index: Some(filter_map_index),
278-
filter_map_values: Some(filter_map_values),
264+
filter_map_index: Some(flat_filter_map.keys),
265+
filter_map_values: Some(flat_filter_map.values),
279266
},
280267
)
281268
}
269+
}
282270

283-
pub fn make_flatbuffer(
284-
network_filters: Vec<NetworkFilter>,
285-
optimize: bool,
286-
) -> VerifiedFlatbufferMemory {
287-
type FilterId = NetworkFilterListId;
288-
let mut builder = FlatBufferBuilder::new(FilterId::Size as usize);
271+
impl NetworkRulesBuilder {
272+
pub fn from_rules(network_filters: Vec<NetworkFilter>, optimize: bool) -> Self {
273+
let mut lists = vec![];
274+
for list_id in 0..NetworkFilterListId::Size as usize {
275+
// Don't optimize removeparam, since it can fuse filters without respecting distinct
276+
let optimize = optimize && list_id != NetworkFilterListId::RemoveParam as usize;
277+
lists.push(NetworkFilterListBuilder::new(optimize));
278+
}
279+
let mut self_ = Self { lists };
289280

290281
let mut badfilter_ids: HashSet<Hash> = HashSet::new();
282+
283+
// Collect badfilter ids in advance.
291284
for filter in network_filters.iter() {
292285
if filter.is_badfilter() {
293286
badfilter_ids.insert(filter.get_id_without_badfilter());
294287
}
295288
}
289+
296290
for filter in network_filters.into_iter() {
297291
// skip any bad filters
298292
let filter_id = filter.get_id();
@@ -302,8 +296,9 @@ impl FlatBufferBuilder {
302296

303297
// Redirects are independent of blocking behavior.
304298
if filter.is_redirect() {
305-
builder.add_filter(filter.clone(), FilterId::Redirects as u32);
299+
self_.add_filter(filter.clone(), NetworkFilterListId::Redirects);
306300
}
301+
type FilterId = NetworkFilterListId;
307302

308303
let list_id: FilterId = if filter.is_csp() {
309304
FilterId::Csp
@@ -326,9 +321,30 @@ impl FlatBufferBuilder {
326321
continue;
327322
};
328323

329-
builder.add_filter(filter, list_id as u32);
324+
self_.add_filter(filter, list_id);
330325
}
331326

332-
builder.finish(optimize)
327+
self_
328+
}
329+
330+
fn add_filter(&mut self, network_filter: NetworkFilter, list_id: NetworkFilterListId) {
331+
self.lists[list_id as usize].filters.push(network_filter);
333332
}
334333
}
334+
335+
impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkRulesBuilder {
336+
type Output = WIPFlatVec<'a, NetworkFilterListBuilder, EngineFlatBuilder<'a>>;
337+
fn serialize(value: Self, builder: &mut EngineFlatBuilder<'a>) -> Self::Output {
338+
FlatSerialize::serialize(value.lists, builder)
339+
}
340+
}
341+
342+
pub fn make_flatbuffer(
343+
network_filters: Vec<NetworkFilter>,
344+
optimize: bool,
345+
) -> VerifiedFlatbufferMemory {
346+
let mut builder = EngineFlatBuilder::default();
347+
let network_rules_builder = NetworkRulesBuilder::from_rules(network_filters, optimize);
348+
let network_rules = FlatSerialize::serialize(network_rules_builder, &mut builder);
349+
builder.finish(network_rules)
350+
}

0 commit comments

Comments
 (0)