Skip to content

Commit 33cdf69

Browse files
authored
Store tagged filters into the flatbuffer (#481)
1 parent fce2a75 commit 33cdf69

File tree

6 files changed

+14
-96
lines changed

6 files changed

+14
-96
lines changed

src/blocker.rs

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,13 @@ pub struct Blocker {
7171
pub(crate) importants: NetworkFilterList,
7272
pub(crate) redirects: NetworkFilterList,
7373
pub(crate) removeparam: NetworkFilterList,
74-
pub(crate) filters_tagged: NetworkFilterList,
7574
pub(crate) filters: NetworkFilterList,
7675
pub(crate) generic_hide: NetworkFilterList,
7776

7877
// Enabled tags are not serialized - when deserializing, tags of the existing
7978
// instance (the one we are recreating lists into) are maintained
8079
pub(crate) tags_enabled: HashSet<String>,
81-
pub(crate) tagged_filters_all: Vec<NetworkFilter>,
80+
pub(crate) tagged_filters_all: NetworkFilterList,
8281

8382
pub(crate) enable_optimizations: bool,
8483

@@ -144,7 +143,7 @@ impl Blocker {
144143

145144
// only check the rest of the rules if not previously matched
146145
let filter = if important_filter.is_none() && !matched_rule {
147-
self.filters_tagged
146+
self.tagged_filters_all
148147
.check(request, &self.tags_enabled, &mut regex_manager)
149148
.or_else(|| self.filters.check(request, &NO_TAGS, &mut regex_manager))
150149
} else {
@@ -459,8 +458,6 @@ impl Blocker {
459458
}
460459
}
461460

462-
tagged_filters_all.shrink_to_fit();
463-
464461
Self {
465462
csp: NetworkFilterList::new(csp, options.enable_optimizations),
466463
exceptions: NetworkFilterList::new(exceptions, options.enable_optimizations),
@@ -469,12 +466,14 @@ impl Blocker {
469466
// Don't optimize removeparam, since it can fuse filters without respecting distinct
470467
// queryparam values
471468
removeparam: NetworkFilterList::new(removeparam, false),
472-
filters_tagged: NetworkFilterList::new(Vec::new(), options.enable_optimizations),
473469
filters: NetworkFilterList::new(filters, options.enable_optimizations),
474470
generic_hide: NetworkFilterList::new(generic_hide, options.enable_optimizations),
475471
// Tags special case for enabling/disabling them dynamically
476472
tags_enabled: HashSet::new(),
477-
tagged_filters_all,
473+
tagged_filters_all: NetworkFilterList::new(
474+
tagged_filters_all,
475+
options.enable_optimizations,
476+
),
478477
// Options
479478
enable_optimizations: options.enable_optimizations,
480479
regex_manager: Default::default(),
@@ -508,13 +507,6 @@ impl Blocker {
508507

509508
fn tags_with_set(&mut self, tags_enabled: HashSet<String>) {
510509
self.tags_enabled = tags_enabled;
511-
let filters: Vec<NetworkFilter> = self
512-
.tagged_filters_all
513-
.iter()
514-
.filter(|n| n.tag.is_some() && self.tags_enabled.contains(n.tag.as_ref().unwrap()))
515-
.cloned()
516-
.collect();
517-
self.filters_tagged = NetworkFilterList::new(filters, self.enable_optimizations);
518510
}
519511

520512
pub fn tags_enabled(&self) -> Vec<String> {

src/data_format/storage.rs

Lines changed: 4 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,6 @@ struct NetworkFilterSerializeFmt<'a> {
196196
tag: &'a Option<String>,
197197
raw_line: Option<String>,
198198
id: &'a crate::utils::Hash,
199-
opt_domains_union: &'a Option<crate::utils::Hash>,
200-
opt_not_domains_union: &'a Option<crate::utils::Hash>,
201199
}
202200

203201
/// Generic over `Borrow<NetworkFilter>` because `tagged_filters_all` requires `&'a NetworkFilter`
@@ -228,8 +226,6 @@ where
228226
tag: &v.tag,
229227
raw_line: v.raw_line.as_ref().map(|raw| *raw.clone()),
230228
id: &v.id,
231-
opt_domains_union: &v.opt_domains_union,
232-
opt_not_domains_union: &v.opt_not_domains_union,
233229
}
234230
}
235231
}
@@ -252,17 +248,6 @@ where
252248
storage_list.serialize(s)
253249
}
254250

255-
/// Forces a `NetworkFilter` slice to be serialized by converting to
256-
/// an intermediate representation that is constructed with `NetworkFilterFmt` instead.
257-
fn serialize_storage_network_filter_vec<S>(vec: &[NetworkFilter], s: S) -> Result<S::Ok, S::Error>
258-
where
259-
S: serde::Serializer,
260-
{
261-
let storage_vec: Vec<_> = vec.iter().map(NetworkFilterSerializeFmt::from).collect();
262-
263-
storage_vec.serialize(s)
264-
}
265-
266251
/// Provides structural aggregration of referenced adblock engine data to allow for allocation-free
267252
/// serialization.
268253
#[derive(Serialize)]
@@ -276,14 +261,12 @@ pub(crate) struct SerializeFormat<'a> {
276261
#[serde(serialize_with = "serialize_network_filter_list")]
277262
redirects: &'a NetworkFilterList,
278263
#[serde(serialize_with = "serialize_network_filter_list")]
279-
filters_tagged: &'a NetworkFilterList,
280-
#[serde(serialize_with = "serialize_network_filter_list")]
281264
filters: &'a NetworkFilterList,
282265
#[serde(serialize_with = "serialize_network_filter_list")]
283266
generic_hide: &'a NetworkFilterList,
284267

285-
#[serde(serialize_with = "serialize_storage_network_filter_vec")]
286-
tagged_filters_all: &'a Vec<NetworkFilter>,
268+
#[serde(serialize_with = "serialize_network_filter_list")]
269+
tagged_filters_all: &'a NetworkFilterList,
287270

288271
enable_optimizations: bool,
289272

@@ -320,42 +303,6 @@ impl SerializeFormat<'_> {
320303
}
321304
}
322305

323-
/// `_bug` is no longer used, and is cleaned up from future format versions.
324-
#[derive(Debug, Clone, Deserialize)]
325-
pub(crate) struct NetworkFilterDeserializeFmt {
326-
pub mask: crate::filters::network::NetworkFilterMask,
327-
pub filter: crate::filters::network::FilterPart,
328-
pub opt_domains: Option<Vec<crate::utils::Hash>>,
329-
pub opt_not_domains: Option<Vec<crate::utils::Hash>>,
330-
pub redirect: Option<String>,
331-
pub hostname: Option<String>,
332-
pub csp: Option<String>,
333-
_bug: Option<u32>,
334-
pub tag: Option<String>,
335-
pub raw_line: Option<String>,
336-
pub id: crate::utils::Hash,
337-
pub opt_domains_union: Option<crate::utils::Hash>,
338-
pub opt_not_domains_union: Option<crate::utils::Hash>,
339-
}
340-
341-
impl From<NetworkFilterDeserializeFmt> for NetworkFilter {
342-
fn from(v: NetworkFilterDeserializeFmt) -> Self {
343-
Self {
344-
mask: v.mask,
345-
filter: v.filter,
346-
opt_domains: v.opt_domains,
347-
opt_not_domains: v.opt_not_domains,
348-
modifier_option: v.redirect.or(v.csp),
349-
hostname: v.hostname,
350-
tag: v.tag,
351-
raw_line: v.raw_line.map(Box::new),
352-
id: v.id,
353-
opt_domains_union: v.opt_domains_union,
354-
opt_not_domains_union: v.opt_not_domains_union,
355-
}
356-
}
357-
}
358-
359306
#[derive(Debug, Deserialize, Default)]
360307
pub(crate) struct NetworkFilterListDeserializeFmt {
361308
pub flatbuffer_memory: Vec<u8>,
@@ -379,11 +326,10 @@ pub(crate) struct DeserializeFormat {
379326
exceptions: NetworkFilterListDeserializeFmt,
380327
importants: NetworkFilterListDeserializeFmt,
381328
redirects: NetworkFilterListDeserializeFmt,
382-
filters_tagged: NetworkFilterListDeserializeFmt,
383329
filters: NetworkFilterListDeserializeFmt,
384330
generic_hide: NetworkFilterListDeserializeFmt,
385331

386-
tagged_filters_all: Vec<NetworkFilterDeserializeFmt>,
332+
tagged_filters_all: NetworkFilterListDeserializeFmt,
387333

388334
enable_optimizations: bool,
389335

@@ -422,7 +368,6 @@ impl<'a> From<(&'a Blocker, &'a CosmeticFilterCache)> for SerializeFormat<'a> {
422368
exceptions: &blocker.exceptions,
423369
importants: &blocker.importants,
424370
redirects: &blocker.redirects,
425-
filters_tagged: &blocker.filters_tagged,
426371
filters: &blocker.filters,
427372
generic_hide: &blocker.generic_hide,
428373

@@ -465,12 +410,11 @@ impl TryFrom<DeserializeFormat> for (Blocker, CosmeticFilterCache) {
465410
importants: v.importants.try_into()?,
466411
redirects: v.redirects.try_into()?,
467412
removeparam: NetworkFilterList::default(),
468-
filters_tagged: v.filters_tagged.try_into()?,
469413
filters: v.filters.try_into()?,
470414
generic_hide: v.generic_hide.try_into()?,
471415

472416
tags_enabled: Default::default(),
473-
tagged_filters_all: v.tagged_filters_all.into_iter().map(|f| f.into()).collect(),
417+
tagged_filters_all: v.tagged_filters_all.try_into()?,
474418

475419
enable_optimizations: v.enable_optimizations,
476420
regex_manager: Default::default(),

src/filters/network.rs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -385,10 +385,6 @@ pub struct NetworkFilter {
385385
pub raw_line: Option<Box<String>>,
386386

387387
pub id: Hash,
388-
389-
// All domain option values (their hashes) OR'ed together to quickly dismiss mis-matches
390-
pub opt_domains_union: Option<Hash>,
391-
pub opt_not_domains_union: Option<Hash>,
392388
}
393389

394390
// TODO - restrict the API so that this is always true - i.e. lazy-calculate IDs from actual data,
@@ -453,8 +449,6 @@ impl NetworkFilter {
453449

454450
let mut opt_domains: Option<Vec<Hash>> = None;
455451
let mut opt_not_domains: Option<Vec<Hash>> = None;
456-
let mut opt_domains_union: Option<Hash> = None;
457-
let mut opt_not_domains_union: Option<Hash> = None;
458452

459453
let mut modifier_option: Option<String> = None;
460454
let mut tag: Option<String> = None;
@@ -497,14 +491,10 @@ impl NetworkFilter {
497491

498492
if !opt_domains_array.is_empty() {
499493
opt_domains_array.sort_unstable();
500-
opt_domains_union =
501-
Some(opt_domains_array.iter().fold(0, |acc, x| acc | x));
502494
opt_domains = Some(opt_domains_array);
503495
}
504496
if !opt_not_domains_array.is_empty() {
505497
opt_not_domains_array.sort_unstable();
506-
opt_not_domains_union =
507-
Some(opt_not_domains_array.iter().fold(0, |acc, x| acc | x));
508498
opt_not_domains = Some(opt_not_domains_array);
509499
}
510500
}
@@ -822,8 +812,6 @@ impl NetworkFilter {
822812
},
823813
modifier_option,
824814
id: utils::fast_hash(line),
825-
opt_domains_union,
826-
opt_not_domains_union,
827815
})
828816
}
829817

src/optimizer.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,9 +167,7 @@ impl Optimization for UnionDomainGroup {
167167
if !domains.is_empty() {
168168
let mut domains = domains.into_iter().cloned().collect::<Vec<_>>();
169169
domains.sort_unstable();
170-
let opt_domains_union = Some(domains.iter().fold(0, |acc, x| acc | x));
171170
filter.opt_domains = Some(domains);
172-
filter.opt_domains_union = opt_domains_union;
173171
}
174172
if !not_domains.is_empty() {
175173
let mut domains = not_domains.into_iter().cloned().collect::<Vec<_>>();

tests/unit/blocker.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,6 @@ mod blocker_tests {
11451145
blocker.tags_enabled,
11461146
HashSet::from_iter([String::from("stuff")].into_iter())
11471147
);
1148-
assert_eq!(blocker.filters_tagged.get_filter_map().total_size(), 2);
11491148

11501149
request_expectations
11511150
.into_iter()
@@ -1200,7 +1199,6 @@ mod blocker_tests {
12001199
blocker.tags_enabled,
12011200
HashSet::from_iter([String::from("brian"), String::from("stuff")].into_iter())
12021201
);
1203-
assert_eq!(blocker.filters_tagged.get_filter_map().total_size(), 4);
12041202

12051203
request_expectations
12061204
.into_iter()
@@ -1254,13 +1252,11 @@ mod blocker_tests {
12541252
blocker.tags_enabled,
12551253
HashSet::from_iter([String::from("brian"), String::from("stuff")].into_iter())
12561254
);
1257-
assert_eq!(blocker.filters_tagged.get_filter_map().total_size(), 4);
12581255
blocker.disable_tags(&["stuff"]);
12591256
assert_eq!(
12601257
blocker.tags_enabled,
12611258
HashSet::from_iter([String::from("brian")].into_iter())
12621259
);
1263-
assert_eq!(blocker.filters_tagged.get_filter_map().total_size(), 2);
12641260

12651261
request_expectations
12661262
.into_iter()

tests/unit/engine.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ mod tests {
183183
fn deserialization_generate_simple() {
184184
let mut engine = Engine::from_rules(["ad-banner"], Default::default());
185185
let data = engine.serialize().unwrap();
186-
assert_eq!(hash(&data), 867372640370260034, "{}", HASH_MISSMATCH_MSG);
186+
assert_eq!(hash(&data), 5723845290597955159, "{}", HASH_MISSMATCH_MSG);
187187
engine.deserialize(&data).unwrap();
188188
}
189189

@@ -192,7 +192,7 @@ mod tests {
192192
let mut engine = Engine::from_rules(["ad-banner$tag=abc"], Default::default());
193193
engine.use_tags(&["abc"]);
194194
let data = engine.serialize().unwrap();
195-
assert_eq!(hash(&data), 13055424859571526788, "{}", HASH_MISSMATCH_MSG);
195+
assert_eq!(hash(&data), 9626816743810307798, "{}", HASH_MISSMATCH_MSG);
196196
engine.deserialize(&data).unwrap();
197197
}
198198

@@ -217,9 +217,9 @@ mod tests {
217217
let data = engine.serialize().unwrap();
218218

219219
let expected_hash = if cfg!(feature = "css-validation") {
220-
11273622377918966036
220+
6718506180720782170
221221
} else {
222-
3151397840865371565
222+
6839468684492187294
223223
};
224224

225225
assert_eq!(hash(&data), expected_hash, "{}", HASH_MISSMATCH_MSG);

0 commit comments

Comments
 (0)