diff --git a/benches/bench_memory.rs b/benches/bench_memory.rs index 695415e6..a5b9b547 100644 --- a/benches/bench_memory.rs +++ b/benches/bench_memory.rs @@ -149,9 +149,10 @@ fn bench_memory_usage(c: &mut Criterion) { ALLOCATOR.reset(); let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]); let mut engine = Engine::from_rules(rules, Default::default()); - let resource_json = + let mut resource_json = std::fs::read_to_string("data/brave/brave-resources.json").unwrap(); - let resource_list: Vec = serde_json::from_str(&resource_json).unwrap(); + let resource_list: Vec = + serde_json::from_str(&std::mem::take(&mut resource_json)).unwrap(); engine.use_resources(resource_list); if run_requests { diff --git a/benches/bench_rules.rs b/benches/bench_rules.rs index f5b850ef..4136bbde 100644 --- a/benches/bench_rules.rs +++ b/benches/bench_rules.rs @@ -100,7 +100,7 @@ fn blocker_new(c: &mut Criterion) { .collect(); let brave_list_rules: Vec<_> = rules_from_lists(&["data/brave/brave-main-list.txt"]).collect(); let engine = Engine::from_rules(&brave_list_rules, Default::default()); - let engine_serialized = engine.serialize().unwrap(); + let engine_serialized = engine.serialize().to_vec(); group.bench_function("el+ep", move |b| b.iter(|| get_engine(&easylist_rules))); group.bench_function("brave-list", move |b| { diff --git a/benches/bench_serialization.rs b/benches/bench_serialization.rs index 8076eb5f..ff84d17a 100644 --- a/benches/bench_serialization.rs +++ b/benches/bench_serialization.rs @@ -18,19 +18,19 @@ fn serialization(c: &mut Criterion) { ]); let engine = Engine::from_rules(full_rules, Default::default()); - b.iter(|| assert!(!engine.serialize().unwrap().is_empty())) + b.iter(|| assert!(!engine.serialize().to_vec().is_empty())) }); group.bench_function("el", move |b| { let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); - b.iter(|| assert!(!engine.serialize().unwrap().is_empty())) + b.iter(|| assert!(!engine.serialize().to_vec().is_empty())) }); group.bench_function("slimlist", move |b| { let full_rules = rules_from_lists(&["data/slim-list.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); - b.iter(|| assert!(!engine.serialize().unwrap().is_empty())) + b.iter(|| assert!(!engine.serialize().to_vec().is_empty())) }); group.finish(); @@ -48,7 +48,7 @@ fn deserialization(c: &mut Criterion) { ]); let engine = Engine::from_rules(full_rules, Default::default()); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); b.iter(|| { let mut deserialized = Engine::default(); @@ -59,7 +59,7 @@ fn deserialization(c: &mut Criterion) { let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); b.iter(|| { let mut deserialized = Engine::default(); @@ -70,7 +70,7 @@ fn deserialization(c: &mut Criterion) { let full_rules = rules_from_lists(&["data/slim-list.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); b.iter(|| { let mut deserialized = Engine::default(); diff --git a/examples/generate-dat.rs b/examples/generate-dat.rs index 86845c5f..513db3f7 100644 --- a/examples/generate-dat.rs +++ b/examples/generate-dat.rs @@ -21,7 +21,7 @@ fn main() { ) .unwrap(); assert!(engine.check_network_request(&request).exception.is_some()); - let serialized = engine.serialize().expect("Could not serialize!"); + let serialized = engine.serialize().to_vec(); // Write to file let mut file = File::create("engine.dat").expect("Could not create serialization file"); diff --git a/js/src/lib.rs b/js/src/lib.rs index 7396162a..ee8a7add 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -248,7 +248,7 @@ fn engine_url_cosmetic_resources(mut cx: FunctionContext) -> JsResult { fn engine_serialize(mut cx: FunctionContext) -> JsResult { let this = cx.argument::>(0)?; let serialized = if let Ok(engine) = this.0.lock() { - engine.serialize().unwrap() + engine.serialize().to_vec() } else { cx.throw_error("Failed to acquire lock on engine")? }; diff --git a/src/blocker.rs b/src/blocker.rs index fe31fb18..0145a5df 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -440,11 +440,15 @@ impl Blocker { network_filters: Vec, options: &BlockerOptions, ) -> Self { - use crate::filters::fb_builder::FlatBufferBuilder; + use crate::filters::fb_builder::make_flatbuffer_from_rules; use crate::filters::fb_network::FilterDataContext; - let memory = - FlatBufferBuilder::make_flatbuffer(network_filters, options.enable_optimizations); + let memory = make_flatbuffer_from_rules( + network_filters, + vec![], // no cosmetic filters for blocker test + options.enable_optimizations, + 0, + ); let filter_data_context = FilterDataContext::new(memory); Self::from_context(filter_data_context) } diff --git a/src/cosmetic_filter_cache.rs b/src/cosmetic_filter_cache.rs index aef3bff7..9d93d550 100644 --- a/src/cosmetic_filter_cache.rs +++ b/src/cosmetic_filter_cache.rs @@ -11,14 +11,47 @@ use crate::filters::cosmetic::{ CosmeticFilter, CosmeticFilterAction, CosmeticFilterMask, CosmeticFilterOperator, }; +use crate::filters::fb_network::flat::fb; +use crate::filters::fb_network::FilterDataContextRef; +use crate::flatbuffers::containers::flat_multimap::{ + FlatMapBuilder, FlatMapStringView, FlatMapView, FlatMultiMapBuilder, FlatMultiMapView, +}; + +use crate::flatbuffers::containers::flat_serialize::{serialize_vec_opt, Builder, FlatSerialize}; +use crate::flatbuffers::containers::flat_set::FlatSetView; + use crate::resources::{PermissionMask, ResourceStorage}; use crate::utils::Hash; use std::collections::{HashMap, HashSet}; +use flatbuffers::WIPOffset; use memchr::memchr as find_char; use serde::{Deserialize, Serialize}; +/// Encodes permission bits in the first byte of a script string +/// Returns the script with permission byte prepended +fn encode_script_with_permission(script: String, permission: PermissionMask) -> String { + let mut encoded = String::with_capacity(script.len() + 1); + encoded.push(permission.to_bits() as char); + encoded.push_str(&script); + encoded +} + +/// Decodes permission bits from the first byte of a script string +/// Returns (permission, script) tuple +fn decode_script_with_permission(encoded_script: &str) -> (PermissionMask, &str) { + if encoded_script.is_empty() { + return (PermissionMask::default(), encoded_script); + } + + let first_char = encoded_script.chars().next().unwrap(); + let permission_bits = first_char as u8; + let permission = PermissionMask::from_bits(permission_bits); + let script = &encoded_script[first_char.len_utf8()..]; + (permission, script) +} + /// Contains cosmetic filter information intended to be used on a particular URL. #[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] pub struct UrlSpecificResources { @@ -63,49 +96,61 @@ impl UrlSpecificResources { /// will be blocked on any particular page, although when used correctly, all provided rules and /// scriptlets should be safe to apply. pub(crate) struct CosmeticFilterCache { - /// Rules that are just the CSS class of an element to be hidden on all sites, e.g. `##.ad`. - pub(crate) simple_class_rules: HashSet, - /// Rules that are just the CSS id of an element to be hidden on all sites, e.g. `###banner`. - pub(crate) simple_id_rules: HashSet, - /// Rules that are the CSS selector of an element to be hidden on all sites, starting with a - /// class, e.g. `##.ad image`. - pub(crate) complex_class_rules: HashMap>, - /// Rules that are the CSS selector of an element to be hidden on all sites, starting with an - /// id, e.g. `###banner > .text a`. - pub(crate) complex_id_rules: HashMap>, - - pub(crate) specific_rules: HostnameRuleDb, - - /// Rules that are the CSS selector of an element to be hidden on all sites that do not fit - /// into any of the class or id buckets above, e.g. `##a[href="https://malware.com"]` - pub(crate) misc_generic_selectors: HashSet, + filter_data_context: FilterDataContextRef, } -impl CosmeticFilterCache { - pub fn new() -> Self { - Self { - simple_class_rules: HashSet::new(), - simple_id_rules: HashSet::new(), - complex_class_rules: HashMap::new(), - complex_id_rules: HashMap::new(), - - specific_rules: HostnameRuleDb::default(), +/// Accumulates hostname-specific rules for a single domain before building HostnameSpecificRules +/// Note: hide and inject_script are now handled separately at the top level +#[derive(Default)] +struct HostnameRule { + unhide: Vec, + uninject_script: Vec, + procedural_action: Vec, + procedural_action_exception: Vec, +} - misc_generic_selectors: HashSet::new(), - } +impl<'a, B: Builder<'a>> FlatSerialize<'a, B> for HostnameRule { + type Output = WIPOffset>; + + fn serialize( + value: Self, + builder: &mut B, + ) -> flatbuffers::WIPOffset> { + let unhide = serialize_vec_opt(value.unhide, builder); + let uninject_script = serialize_vec_opt(value.uninject_script, builder); + let procedural_action = serialize_vec_opt(value.procedural_action, builder); + let procedural_action_exception = + serialize_vec_opt(value.procedural_action_exception, builder); + + fb::HostnameSpecificRules::create( + builder.raw_builder(), + &fb::HostnameSpecificRulesArgs { + unhide, + uninject_script, + procedural_action, + procedural_action_exception, + }, + ) } +} - pub fn from_rules(rules: Vec) -> Self { - let mut self_ = Self { - simple_class_rules: HashSet::with_capacity(rules.len() / 2), - simple_id_rules: HashSet::with_capacity(rules.len() / 2), - complex_class_rules: HashMap::with_capacity(rules.len() / 2), - complex_id_rules: HashMap::with_capacity(rules.len() / 2), +#[derive(Default)] +pub(crate) struct CosmeticFilterCacheBuilder { + simple_class_rules: HashSet, + simple_id_rules: HashSet, + misc_generic_selectors: HashSet, + complex_class_rules: FlatMultiMapBuilder, + complex_id_rules: FlatMultiMapBuilder, - specific_rules: HostnameRuleDb::default(), + hostname_hide: FlatMultiMapBuilder, + hostname_inject_script: FlatMultiMapBuilder, - misc_generic_selectors: HashSet::with_capacity(rules.len() / 30), - }; + specific_rules: HashMap, +} + +impl CosmeticFilterCacheBuilder { + pub fn from_rules(rules: Vec) -> Self { + let mut self_ = Self::default(); for rule in rules { self_.add_filter(rule) @@ -119,7 +164,7 @@ impl CosmeticFilterCache { if let Some(generic_rule) = rule.hidden_generic_rule() { self.add_generic_filter(generic_rule); } - self.specific_rules.store_rule(rule); + self.store_hostname_rule(rule); } else { self.add_generic_filter(rule); } @@ -142,10 +187,8 @@ impl CosmeticFilterCache { let class = key[1..].to_string(); if key == selector { self.simple_class_rules.insert(class); - } else if let Some(bucket) = self.complex_class_rules.get_mut(&class) { - bucket.push(selector); } else { - self.complex_class_rules.insert(class, vec![selector]); + self.complex_class_rules.insert(class, selector); } } } else if selector.starts_with('#') { @@ -154,10 +197,8 @@ impl CosmeticFilterCache { let id = key[1..].to_string(); if key == selector { self.simple_id_rules.insert(id); - } else if let Some(bucket) = self.complex_id_rules.get_mut(&id) { - bucket.push(selector); } else { - self.complex_id_rules.insert(id, vec![selector]); + self.complex_id_rules.insert(id, selector); } } } else { @@ -165,6 +206,102 @@ impl CosmeticFilterCache { } } + // TODO: review this + fn store_hostname_rule(&mut self, rule: CosmeticFilter) { + use SpecificFilterType::*; + + let unhide = rule.mask.contains(CosmeticFilterMask::UNHIDE); + let script_inject = rule.mask.contains(CosmeticFilterMask::SCRIPT_INJECT); + + let kind = match ( + script_inject, + rule.plain_css_selector().map(|s| s.to_string()), + rule.action, + ) { + (false, Some(selector), None) => Hide(selector), + (true, Some(selector), None) => InjectScript((selector, rule.permission)), + (false, selector, action) => ProceduralOrAction( + serde_json::to_string(&ProceduralOrActionFilter { + selector: selector + .map(|selector| vec![CosmeticFilterOperator::CssSelector(selector)]) + .unwrap_or(rule.selector), + action, + }) + .unwrap(), + ), + (true, _, Some(_)) => return, // script injection with action - shouldn't be possible + (true, None, _) => return, // script injection without plain CSS selector - shouldn't be possible + }; + + let kind = if unhide { kind.negated() } else { kind }; + + let tokens_to_insert = std::iter::empty() + .chain(rule.hostnames.unwrap_or_default()) + .chain(rule.entities.unwrap_or_default()); + + tokens_to_insert.for_each(|t| self.store_hostname_filter(&t, kind.clone())); + + let tokens_to_insert_negated = std::iter::empty() + .chain(rule.not_hostnames.unwrap_or_default()) + .chain(rule.not_entities.unwrap_or_default()); + + let negated = kind.negated(); + + tokens_to_insert_negated.for_each(|t| self.store_hostname_filter(&t, negated.clone())); + } + + fn store_hostname_filter(&mut self, token: &Hash, kind: SpecificFilterType) { + use SpecificFilterType::*; + + match kind { + // Handle hide and inject_script at top level for better deduplication + Hide(s) => { + self.hostname_hide.insert(*token, s); + } + InjectScript((s, permission)) => { + let encoded_script = encode_script_with_permission(s, permission); + self.hostname_inject_script.insert(*token, encoded_script); + } + // Handle remaining types through HostnameRule + Unhide(s) => { + let entry = self.specific_rules.entry(*token).or_default(); + entry.unhide.push(s); + } + UninjectScript((s, _)) => { + let entry = self.specific_rules.entry(*token).or_default(); + entry.uninject_script.push(s); + } + ProceduralOrAction(s) => { + let entry = self.specific_rules.entry(*token).or_default(); + entry.procedural_action.push(s); + } + ProceduralOrActionException(s) => { + let entry = self.specific_rules.entry(*token).or_default(); + entry.procedural_action_exception.push(s); + } + } + } +} + +impl CosmeticFilterCache { + pub fn from_context(filter_data_context: FilterDataContextRef) -> Self { + Self { + filter_data_context, + } + } + + #[cfg(test)] + pub fn from_rules(rules: Vec) -> Self { + use crate::filters::{ + fb_builder::make_flatbuffer_from_rules, fb_network::FilterDataContext, + }; + + let memory = make_flatbuffer_from_rules(vec![], rules, true, 0); + + let filter_data_context = FilterDataContext::new(memory); + Self::from_context(filter_data_context) + } + /// Generic class/id rules are by far the most common type of cosmetic filtering rule, and they /// apply to all sites. Rather than injecting all of these rules onto every page, which would /// blow up memory usage, we only inject rules based on classes and ids that actually appear on @@ -191,34 +328,40 @@ impl CosmeticFilterCache { ) -> Vec { let mut selectors = vec![]; + let cs = self.filter_data_context.memory.root().cosmetic_filters(); + let simple_class_rules = FlatSetView::new(cs.simple_class_rules()); + let simple_id_rules = FlatSetView::new(cs.simple_id_rules()); + let complex_class_rules = FlatMapStringView::new( + cs.complex_class_rules_index(), + cs.complex_class_rules_values(), + ); + let complex_id_rules = + FlatMapStringView::new(cs.complex_id_rules_index(), cs.complex_id_rules_values()); + classes.into_iter().for_each(|class| { let class = class.as_ref(); - if self.simple_class_rules.contains(class) - && !exceptions.contains(&format!(".{}", class)) - { + if simple_class_rules.contains(class) && !exceptions.contains(&format!(".{}", class)) { selectors.push(format!(".{}", class)); } - if let Some(bucket) = self.complex_class_rules.get(class) { - selectors.extend( - bucket - .iter() - .filter(|sel| !exceptions.contains(*sel)) - .map(|s| s.to_owned()), - ); + if let Some(bucket) = complex_class_rules.get(class) { + for (_, sel) in bucket { + if !exceptions.contains(sel) { + selectors.push(sel.to_string()); + } + } } }); ids.into_iter().for_each(|id| { let id = id.as_ref(); - if self.simple_id_rules.contains(id) && !exceptions.contains(&format!("#{}", id)) { + if simple_id_rules.contains(id) && !exceptions.contains(&format!("#{}", id)) { selectors.push(format!("#{}", id)); } - if let Some(bucket) = self.complex_id_rules.get(id) { - selectors.extend( - bucket - .iter() - .filter(|sel| !exceptions.contains(*sel)) - .map(|s| s.to_owned()), - ); + if let Some(bucket) = complex_id_rules.get(id) { + for (_, sel) in bucket { + if !exceptions.contains(sel) { + selectors.push(sel.to_string()); + } + } } }); @@ -258,75 +401,77 @@ impl CosmeticFilterCache { .chain(request_hostnames.iter()) .collect(); - fn populate_set( - hash: &Hash, - source_bin: &HostnameFilterBin, - dest_set: &mut HashSet, - ) { - if let Some(s) = source_bin.get(hash) { - s.iter().for_each(|s| { - dest_set.insert(s.to_owned()); - }); - } - } + let cf = self.filter_data_context.memory.root().cosmetic_filters(); + let hostname_rules_view = FlatMapView::new(cf.hostname_index(), cf.hostname_values()); + let hostname_hide_view = + FlatMultiMapView::new(cf.hostname_hide_index(), cf.hostname_hide_values()); + let hostname_inject_script_view = FlatMultiMapView::new( + cf.hostname_inject_script_index(), + cf.hostname_inject_script_values(), + ); + for hash in hashes.iter() { - populate_set( - hash, - &self.specific_rules.hide, - &mut specific_hide_selectors, - ); - populate_set( - hash, - &self.specific_rules.procedural_action, - &mut procedural_actions, - ); - // special behavior: `script_injections` doesn't have to own the strings yet, since the - // scripts need to be fetched and templated later - if let Some(s) = self.specific_rules.inject_script.get(hash) { - s.iter().for_each(|(s, mask)| { + // Handle top-level hide selectors + if let Some(hide_iterator) = hostname_hide_view.get(**hash) { + for (_, hide_selector) in hide_iterator { + if !exceptions.contains(hide_selector) { + specific_hide_selectors.insert(hide_selector.to_owned()); + } + } + } + + // Handle top-level inject scripts with encoded permissions + if let Some(script_iterator) = hostname_inject_script_view.get(**hash) { + for (_, encoded_script) in script_iterator { + let (permission, script) = decode_script_with_permission(encoded_script); script_injections - .entry(s) - .and_modify(|entry| *entry |= *mask) - .or_insert(*mask); - }); + .entry(script) + .and_modify(|entry| *entry |= permission) + .or_insert(permission); + } } - } - fn prune_set( - hash: &Hash, - source_bin: &HostnameFilterBin, - dest_set: &mut HashSet, - ) { - if let Some(s) = source_bin.get(hash) { - s.iter().for_each(|s| { - dest_set.remove(s); - }); + // Handle remaining rule types from HostnameSpecificRules + if let Some(hostname_rules) = hostname_rules_view.get(**hash) { + // Process procedural actions + if let Some(procedural_actions_rules) = hostname_rules.procedural_action() { + for action in procedural_actions_rules.iter() { + procedural_actions.insert(action.to_owned()); + } + } } } + + // Process unhide/exception filters for hash in hashes.iter() { - // special behavior: unhide rules need to go in `exceptions` as well - if let Some(s) = self.specific_rules.unhide.get(hash) { - s.iter().for_each(|s| { - specific_hide_selectors.remove(s); - exceptions.insert(s.to_owned()); - }); - } - prune_set( - hash, - &self.specific_rules.procedural_action_exception, - &mut procedural_actions, - ); - // same logic but not using prune_set since strings are unowned, (see above) - if let Some(s) = self.specific_rules.uninject_script.get(hash) { - for s in s { - if s.is_empty() { - except_all_scripts = true; - script_injections.clear(); + if let Some(hostname_rules) = hostname_rules_view.get(**hash) { + // Process unhide selectors (special behavior: they also go in exceptions) + if let Some(unhide_rules) = hostname_rules.unhide() { + for selector in unhide_rules.iter() { + specific_hide_selectors.remove(selector); + exceptions.insert(selector.to_owned()); } - if except_all_scripts { - continue; + } + + // Process procedural action exceptions + if let Some(procedural_exceptions) = hostname_rules.procedural_action_exception() { + for action in procedural_exceptions.iter() { + procedural_actions.remove(action); + } + } + + // Process script uninjects + if let Some(uninject_scripts) = hostname_rules.uninject_script() { + for script in uninject_scripts.iter() { + if script.is_empty() { + except_all_scripts = true; + script_injections.clear(); + } + if except_all_scripts { + continue; + } + script_injections.remove(script); } - script_injections.remove(s.as_str()); } } } @@ -334,11 +479,17 @@ impl CosmeticFilterCache { let hide_selectors = if generichide { specific_hide_selectors } else { - let mut hide_selectors = self - .misc_generic_selectors - .difference(&exceptions) - .cloned() - .collect::>(); + let cs = self.filter_data_context.memory.root().cosmetic_filters(); + let misc_generic_selectors_vector = cs.misc_generic_selectors(); + + // TODO: check performance of this + let mut hide_selectors = HashSet::new(); + for i in 0..misc_generic_selectors_vector.len() { + let selector = misc_generic_selectors_vector.get(i); + if !exceptions.contains(selector) { + hide_selectors.insert(selector.to_string()); + } + } specific_hide_selectors.into_iter().for_each(|sel| { hide_selectors.insert(sel); }); @@ -357,69 +508,48 @@ impl CosmeticFilterCache { } } -/// Each hostname-specific filter can be pointed to by several different hostnames, and each -/// hostname can correspond to several different filters. To effectively store and access those -/// filters by hostname, all the non-hostname information for filters is stored in per-hostname -/// "buckets" within a Vec, and each bucket is identified by its index. Hostname hashes are used as -/// keys to get the indices of relevant buckets, which are in turn used to retrieve all the filters -/// that apply. -#[derive(Default)] -pub(crate) struct HostnameFilterBin(pub HashMap>); - -impl HostnameFilterBin { - pub fn insert(&mut self, token: &Hash, filter: T) { - if let Some(bucket) = self.0.get_mut(token) { - bucket.push(filter); - } else { - self.0.insert(*token, vec![filter]); - } - } - - fn get(&self, token: &Hash) -> Option<&Vec> { - self.0.get(token) +impl<'a, B: Builder<'a>> FlatSerialize<'a, B> for CosmeticFilterCacheBuilder { + type Output = WIPOffset>; + fn serialize(value: Self, builder: &mut B) -> WIPOffset> { + let complex_class_rules = FlatMultiMapBuilder::finish(value.complex_class_rules, builder); + let complex_id_rules = FlatMultiMapBuilder::finish(value.complex_id_rules, builder); + + // Handle top-level hostname hide and inject_script for better deduplication + let hostname_hide = FlatMultiMapBuilder::finish(value.hostname_hide, builder); + let hostname_inject_script = + FlatMultiMapBuilder::finish(value.hostname_inject_script, builder); + + // Handle remaining rule types through HostnameSpecificRules + let hostname_specific_rules = FlatMapBuilder::finish(value.specific_rules, builder); + + let simple_class_rules = Some(FlatSerialize::serialize(value.simple_class_rules, builder)); + let simple_id_rules = Some(FlatSerialize::serialize(value.simple_id_rules, builder)); + let misc_generic_selectors = Some(FlatSerialize::serialize( + value.misc_generic_selectors, + builder, + )); + + fb::CosmeticFilters::create( + builder.raw_builder(), + &fb::CosmeticFiltersArgs { + simple_class_rules, + simple_id_rules, + misc_generic_selectors, + complex_class_rules_index: Some(complex_class_rules.keys), + complex_class_rules_values: Some(complex_class_rules.values), + complex_id_rules_index: Some(complex_id_rules.keys), + complex_id_rules_values: Some(complex_id_rules.values), + hostname_hide_index: Some(hostname_hide.keys), + hostname_hide_values: Some(hostname_hide.values), + hostname_inject_script_index: Some(hostname_inject_script.keys), + hostname_inject_script_values: Some(hostname_inject_script.values), + hostname_index: Some(hostname_specific_rules.keys), + hostname_values: Some(hostname_specific_rules.values), + }, + ) } } -impl HostnameFilterBin { - /// Convenience method that serializes to JSON - pub fn insert_procedural_action_filter(&mut self, token: &Hash, f: &ProceduralOrActionFilter) { - self.insert(token, serde_json::to_string(f).unwrap()); - } -} - -/// Holds filter bins categorized by filter type. -#[derive(Default)] -pub(crate) struct HostnameRuleDb { - /// Simple hostname-specific hide rules, e.g. `example.com##.ad`. - /// - /// The parameter is the rule's CSS selector. - pub hide: HostnameFilterBin, - /// Simple hostname-specific hide exception rules, e.g. `example.com#@#.ad`. - /// - /// The parameter is the rule's CSS selector. - pub unhide: HostnameFilterBin, - /// Hostname-specific rules with a scriptlet to inject along with any arguments, e.g. - /// `example.com##+js(acis, Number.isNan)`. - /// - /// The parameter is the contents of the `+js(...)` syntax construct. - pub inject_script: HostnameFilterBin<(String, PermissionMask)>, - /// Hostname-specific rules to except a scriptlet to inject along with any arguments, e.g. - /// `example.com#@#+js(acis, Number.isNan)`. - /// - /// The parameter is the contents of the `+js(...)` syntax construct. - /// - /// In practice, these rules are extremely rare in filter lists. - pub uninject_script: HostnameFilterBin, - /// Procedural filters and/or filters with a [`CosmeticFilterAction`]. - /// - /// Each is a [`ProceduralOrActionFilter`] struct serialized as JSON. - pub procedural_action: HostnameFilterBin, - /// Exceptions for procedural filters and/or filters with a [`CosmeticFilterAction`]. - /// - /// Each is a [`ProceduralOrActionFilter`] struct serialized as JSON. - pub procedural_action_exception: HostnameFilterBin, -} - /// Representations of filters with complex behavior that relies on in-page JS logic. /// /// These get stored in-memory as JSON and should be deserialized/acted on by a content script. @@ -453,6 +583,7 @@ impl ProceduralOrActionFilter { } /// Convenience constructor for pure CSS style filters. + #[cfg(test)] pub(crate) fn from_css(selector: String, style: String) -> Self { Self { selector: vec![CosmeticFilterOperator::CssSelector(selector)], @@ -461,64 +592,6 @@ impl ProceduralOrActionFilter { } } -impl HostnameRuleDb { - pub fn store_rule(&mut self, rule: CosmeticFilter) { - use SpecificFilterType::*; - - let unhide = rule.mask.contains(CosmeticFilterMask::UNHIDE); - let script_inject = rule.mask.contains(CosmeticFilterMask::SCRIPT_INJECT); - - let kind = match ( - script_inject, - rule.plain_css_selector().map(|s| s.to_string()), - rule.action, - ) { - (false, Some(selector), None) => Hide(selector), - (true, Some(selector), None) => InjectScript((selector, rule.permission)), - (false, selector, action) => ProceduralOrAction( - serde_json::to_string(&ProceduralOrActionFilter { - selector: selector - .map(|selector| vec![CosmeticFilterOperator::CssSelector(selector)]) - .unwrap_or(rule.selector), - action, - }) - .unwrap(), - ), - (true, _, Some(_)) => return, // script injection with action - shouldn't be possible - (true, None, _) => return, // script injection without plain CSS selector - shouldn't be possible - }; - - let kind = if unhide { kind.negated() } else { kind }; - - let tokens_to_insert = std::iter::empty() - .chain(rule.hostnames.unwrap_or_default()) - .chain(rule.entities.unwrap_or_default()); - - tokens_to_insert.for_each(|t| self.store(&t, kind.clone())); - - let tokens_to_insert_negated = std::iter::empty() - .chain(rule.not_hostnames.unwrap_or_default()) - .chain(rule.not_entities.unwrap_or_default()); - - let negated = kind.negated(); - - tokens_to_insert_negated.for_each(|t| self.store(&t, negated.clone())); - } - - fn store(&mut self, token: &Hash, kind: SpecificFilterType) { - use SpecificFilterType::*; - - match kind { - Hide(s) => self.hide.insert(token, s), - Unhide(s) => self.unhide.insert(token, s), - InjectScript(s) => self.inject_script.insert(token, s), - UninjectScript((s, _)) => self.uninject_script.insert(token, s), - ProceduralOrAction(s) => self.procedural_action.insert(token, s), - ProceduralOrActionException(s) => self.procedural_action_exception.insert(token, s), - } - } -} - /// Exists to use common logic for binning filters correctly #[derive(Clone)] enum SpecificFilterType { diff --git a/src/data_format/mod.rs b/src/data_format/mod.rs deleted file mode 100644 index 0f8f5048..00000000 --- a/src/data_format/mod.rs +++ /dev/null @@ -1,111 +0,0 @@ -//! Allows serialization of the adblock engine into a compact binary format, as well as subsequent -//! rapid deserialization back into an engine. -//! -//! In order to support multiple format versions simultaneously, this module wraps around different -//! serialization/deserialization implementations and can automatically dispatch to the appropriate -//! one. - -mod storage; - -pub(crate) mod utils; - -use crate::cosmetic_filter_cache::CosmeticFilterCache; -use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; -use crate::network_filter_list::NetworkFilterListParsingError; - -/// Newer formats start with this magic byte sequence. -/// Calculated as the leading 4 bytes of `echo -n 'brave/adblock-rust' | sha512sum`. -const ADBLOCK_RUST_DAT_MAGIC: [u8; 4] = [0xd1, 0xd9, 0x3a, 0xaf]; -const ADBLOCK_RUST_DAT_VERSION: u8 = 1; - -#[derive(Debug)] -pub enum SerializationError { - RmpSerdeError(rmp_serde::encode::Error), -} - -impl From for SerializationError { - fn from(e: rmp_serde::encode::Error) -> Self { - Self::RmpSerdeError(e) - } -} - -#[derive(Debug)] -pub enum DeserializationError { - RmpSerdeError(rmp_serde::decode::Error), - UnsupportedFormatVersion(u8), - NoHeaderFound, - FlatBufferParsingError(flatbuffers::InvalidFlatbuffer), - ValidationError, -} - -impl From for DeserializationError { - fn from(x: std::convert::Infallible) -> Self { - match x {} - } -} - -impl From for DeserializationError { - fn from(e: rmp_serde::decode::Error) -> Self { - Self::RmpSerdeError(e) - } -} - -impl From for DeserializationError { - fn from(e: NetworkFilterListParsingError) -> Self { - match e { - NetworkFilterListParsingError::InvalidFlatbuffer(invalid_flatbuffer) => { - Self::FlatBufferParsingError(invalid_flatbuffer) - } - NetworkFilterListParsingError::UniqueDomainsOutOfBounds(_) => Self::ValidationError, - } - } -} - -pub(crate) fn serialize_engine( - flatbuffer_memory: &VerifiedFlatbufferMemory, - cfc: &CosmeticFilterCache, -) -> Result, SerializationError> { - let serialize_format = storage::SerializeFormat::from((flatbuffer_memory, cfc)); - serialize_format.serialize() -} - -pub(crate) fn deserialize_engine( - serialized: &[u8], -) -> Result<(VerifiedFlatbufferMemory, CosmeticFilterCache), DeserializationError> { - let deserialize_format = storage::DeserializeFormat::deserialize(serialized)?; - deserialize_format.try_into() -} - -// Verify the header (MAGIC + VERSION) and return the data after the header. -pub fn parse_dat_header(serialized: &[u8]) -> Result<&[u8], DeserializationError> { - if !serialized.starts_with(&ADBLOCK_RUST_DAT_MAGIC) { - return Err(DeserializationError::NoHeaderFound); - } - if serialized.len() < ADBLOCK_RUST_DAT_MAGIC.len() + 1 { - return Err(DeserializationError::NoHeaderFound); - } - let version = serialized[ADBLOCK_RUST_DAT_MAGIC.len()]; - if version != ADBLOCK_RUST_DAT_VERSION { - return Err(DeserializationError::UnsupportedFormatVersion(version)); - } - - Ok(&serialized[ADBLOCK_RUST_DAT_MAGIC.len() + 1..]) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn validate_magic_bytes() { - use sha2::Digest; - - let mut hasher = sha2::Sha512::new(); - - hasher.update("brave/adblock-rust"); - - let result = hasher.finalize(); - - assert!(result.starts_with(&ADBLOCK_RUST_DAT_MAGIC)); - } -} diff --git a/src/data_format/storage.rs b/src/data_format/storage.rs deleted file mode 100644 index 140d2bfc..00000000 --- a/src/data_format/storage.rs +++ /dev/null @@ -1,308 +0,0 @@ -//! Contains representations of data from the adblocking engine in a -//! forwards-and-backwards-compatible format, as well as utilities for converting these to and from -//! the actual `Engine` components. -//! -//! Any new fields should be added to the _end_ of both `SerializeFormat` and `DeserializeFormat`. - -use std::collections::{HashMap, HashSet}; - -use rmp_serde as rmps; -use serde::{Deserialize, Serialize}; - -use crate::cosmetic_filter_cache::{CosmeticFilterCache, HostnameRuleDb, ProceduralOrActionFilter}; -use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; -use crate::utils::Hash; - -use super::utils::{stabilize_hashmap_serialization, stabilize_hashset_serialization}; -use super::{DeserializationError, SerializationError}; - -/// Each variant describes a single rule that is specific to a particular hostname. -#[derive(Clone, Debug, Deserialize, Serialize)] -enum LegacySpecificFilterType { - Hide(String), - Unhide(String), - Style(String, String), - UnhideStyle(String, String), - ScriptInject(String), - UnhideScriptInject(String), -} - -#[derive(Deserialize, Serialize, Default)] -pub(crate) struct LegacyHostnameRuleDb { - #[serde(serialize_with = "stabilize_hashmap_serialization")] - db: HashMap>, -} - -impl From<&HostnameRuleDb> for LegacyHostnameRuleDb { - fn from(v: &HostnameRuleDb) -> Self { - let mut db = HashMap::>::new(); - for (hash, bin) in v.hide.0.iter() { - for f in bin { - db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::Hide(f.to_owned()))) - .or_insert_with(|| vec![LegacySpecificFilterType::Hide(f.to_owned())]); - } - } - for (hash, bin) in v.unhide.0.iter() { - for f in bin { - db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::Unhide(f.to_owned()))) - .or_insert_with(|| vec![LegacySpecificFilterType::Unhide(f.to_owned())]); - } - } - for (hash, bin) in v.inject_script.0.iter() { - for (f, _mask) in bin { - db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::ScriptInject(f.to_owned()))) - .or_insert_with(|| vec![LegacySpecificFilterType::ScriptInject(f.to_owned())]); - } - } - for (hash, bin) in v.uninject_script.0.iter() { - for f in bin { - db.entry(*hash) - .and_modify(|v| { - v.push(LegacySpecificFilterType::UnhideScriptInject(f.to_owned())) - }) - .or_insert_with(|| { - vec![LegacySpecificFilterType::UnhideScriptInject(f.to_owned())] - }); - } - } - for (hash, bin) in v.procedural_action.0.iter() { - for f in bin { - if let Ok(f) = serde_json::from_str::(f) { - if let Some((selector, style)) = f.as_css() { - db.entry(*hash) - .and_modify(|v| { - v.push(LegacySpecificFilterType::Style( - selector.clone(), - style.clone(), - )) - }) - .or_insert_with(|| { - vec![LegacySpecificFilterType::Style(selector, style)] - }); - } - } - } - } - for (hash, bin) in v.procedural_action_exception.0.iter() { - for f in bin { - if let Ok(f) = serde_json::from_str::(f) { - if let Some((selector, style)) = f.as_css() { - db.entry(*hash) - .and_modify(|v| { - v.push(LegacySpecificFilterType::UnhideStyle( - selector.to_owned(), - style.to_owned(), - )) - }) - .or_insert_with(|| { - vec![LegacySpecificFilterType::UnhideStyle( - selector.to_owned(), - style.to_owned(), - )] - }); - } - } - } - } - LegacyHostnameRuleDb { db } - } -} - -impl From for HostnameRuleDb { - fn from(val: LegacyHostnameRuleDb) -> Self { - use crate::cosmetic_filter_cache::HostnameFilterBin; - - let mut hide = HostnameFilterBin::default(); - let mut unhide = HostnameFilterBin::default(); - let mut procedural_action = HostnameFilterBin::default(); - let mut procedural_action_exception = HostnameFilterBin::default(); - let mut inject_script = HostnameFilterBin::default(); - let mut uninject_script = HostnameFilterBin::default(); - - for (hash, bin) in val.db.into_iter() { - for rule in bin.into_iter() { - match rule { - LegacySpecificFilterType::Hide(s) => hide.insert(&hash, s), - LegacySpecificFilterType::Unhide(s) => unhide.insert(&hash, s), - LegacySpecificFilterType::Style(s, st) => procedural_action - .insert_procedural_action_filter( - &hash, - &ProceduralOrActionFilter::from_css(s, st), - ), - LegacySpecificFilterType::UnhideStyle(s, st) => procedural_action_exception - .insert_procedural_action_filter( - &hash, - &ProceduralOrActionFilter::from_css(s, st), - ), - LegacySpecificFilterType::ScriptInject(s) => { - inject_script.insert(&hash, (s, Default::default())) - } - LegacySpecificFilterType::UnhideScriptInject(s) => { - uninject_script.insert(&hash, s) - } - } - } - } - HostnameRuleDb { - hide, - unhide, - inject_script, - uninject_script, - procedural_action, - procedural_action_exception, - } - } -} - -#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] -pub(crate) struct LegacyRedirectResource { - pub content_type: String, - pub data: String, -} - -#[derive(Serialize, Deserialize, Debug, PartialEq, Default)] -pub(crate) struct LegacyRedirectResourceStorage { - #[serde(serialize_with = "stabilize_hashmap_serialization")] - pub resources: HashMap, -} - -#[derive(Clone, Deserialize, Serialize)] -pub(crate) struct LegacyScriptletResource { - scriptlet: String, -} - -#[derive(Default, Deserialize, Serialize)] -pub(crate) struct LegacyScriptletResourceStorage { - #[serde(serialize_with = "stabilize_hashmap_serialization")] - resources: HashMap, -} - -/// Provides structural aggregration of referenced adblock engine data to allow for allocation-free -/// serialization. -#[derive(Serialize)] -pub(crate) struct SerializeFormat<'a> { - flatbuffer_memory: Vec, - - resources: LegacyRedirectResourceStorage, - - #[serde(serialize_with = "stabilize_hashset_serialization")] - simple_class_rules: &'a HashSet, - #[serde(serialize_with = "stabilize_hashset_serialization")] - simple_id_rules: &'a HashSet, - #[serde(serialize_with = "stabilize_hashmap_serialization")] - complex_class_rules: &'a HashMap>, - #[serde(serialize_with = "stabilize_hashmap_serialization")] - complex_id_rules: &'a HashMap>, - - specific_rules: LegacyHostnameRuleDb, - - #[serde(serialize_with = "stabilize_hashset_serialization")] - misc_generic_selectors: &'a HashSet, - - scriptlets: LegacyScriptletResourceStorage, - - #[serde(serialize_with = "stabilize_hashmap_serialization")] - procedural_action: &'a HashMap>, - #[serde(serialize_with = "stabilize_hashmap_serialization")] - procedural_action_exception: &'a HashMap>, -} - -impl SerializeFormat<'_> { - pub fn serialize(&self) -> Result, SerializationError> { - let mut output = super::ADBLOCK_RUST_DAT_MAGIC.to_vec(); - output.push(super::ADBLOCK_RUST_DAT_VERSION); - rmps::encode::write(&mut output, &self)?; - Ok(output) - } -} - -/// Structural representation of adblock engine data that can be built up from deserialization and -/// used directly to construct new `Engine` components without unnecessary allocation. -#[derive(Deserialize)] -pub(crate) struct DeserializeFormat { - flatbuffer_memory: Vec, - - _resources: LegacyRedirectResourceStorage, - - simple_class_rules: HashSet, - simple_id_rules: HashSet, - complex_class_rules: HashMap>, - complex_id_rules: HashMap>, - - specific_rules: LegacyHostnameRuleDb, - - misc_generic_selectors: HashSet, - - _scriptlets: LegacyScriptletResourceStorage, - - #[serde(default)] - procedural_action: HashMap>, - #[serde(default)] - procedural_action_exception: HashMap>, -} - -impl DeserializeFormat { - pub fn deserialize(serialized: &[u8]) -> Result { - let data = super::parse_dat_header(serialized)?; - let format: Self = rmps::decode::from_read(data)?; - Ok(format) - } -} - -impl<'a> From<(&'a VerifiedFlatbufferMemory, &'a CosmeticFilterCache)> for SerializeFormat<'a> { - fn from(v: (&'a VerifiedFlatbufferMemory, &'a CosmeticFilterCache)) -> Self { - let (memory, cfc) = v; - Self { - flatbuffer_memory: memory.data().to_vec(), - - resources: LegacyRedirectResourceStorage::default(), - - simple_class_rules: &cfc.simple_class_rules, - simple_id_rules: &cfc.simple_id_rules, - complex_class_rules: &cfc.complex_class_rules, - complex_id_rules: &cfc.complex_id_rules, - - specific_rules: (&cfc.specific_rules).into(), - - misc_generic_selectors: &cfc.misc_generic_selectors, - - scriptlets: LegacyScriptletResourceStorage::default(), - - procedural_action: &cfc.specific_rules.procedural_action.0, - procedural_action_exception: &cfc.specific_rules.procedural_action_exception.0, - } - } -} - -impl TryFrom for (VerifiedFlatbufferMemory, CosmeticFilterCache) { - fn try_from(v: DeserializeFormat) -> Result { - use crate::cosmetic_filter_cache::HostnameFilterBin; - - let mut specific_rules: HostnameRuleDb = v.specific_rules.into(); - specific_rules.procedural_action = HostnameFilterBin(v.procedural_action); - specific_rules.procedural_action_exception = - HostnameFilterBin(v.procedural_action_exception); - - let memory = VerifiedFlatbufferMemory::from_raw(v.flatbuffer_memory) - .map_err(DeserializationError::FlatBufferParsingError)?; - - Ok(( - memory, - CosmeticFilterCache { - simple_class_rules: v.simple_class_rules, - simple_id_rules: v.simple_id_rules, - complex_class_rules: v.complex_class_rules, - complex_id_rules: v.complex_id_rules, - - specific_rules, - - misc_generic_selectors: v.misc_generic_selectors, - }, - )) - } - - type Error = DeserializationError; -} diff --git a/src/data_format/utils.rs b/src/data_format/utils.rs deleted file mode 100644 index 3b3b3e81..00000000 --- a/src/data_format/utils.rs +++ /dev/null @@ -1,32 +0,0 @@ -//! Common utilities associated with serialization and deserialization of the `Engine` data into -//! binary formats. - -use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; - -use serde::{Serialize, Serializer}; - -/// Forces a `HashSet` to be serialized with a stable ordering by temporarily representing it as a -/// `BTreeSet`. -pub fn stabilize_hashset_serialization(set: &HashSet, s: S) -> Result -where - S: Serializer, - V: Ord + serde::Serialize, -{ - let stabilized: BTreeSet<&V> = set.iter().collect(); - stabilized.serialize(s) -} - -/// Forces a `HashMap` to be serialized with a stable ordering by temporarily representing it as a -/// `BTreeMap`. -pub fn stabilize_hashmap_serialization( - set: &HashMap, - s: S, -) -> Result -where - S: Serializer, - K: Ord + Serialize, - V: Serialize, -{ - let stabilized: BTreeMap<&K, &V> = set.iter().collect(); - stabilized.serialize(s) -} diff --git a/src/engine.rs b/src/engine.rs index 2d12cfec..b2020bbe 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -2,8 +2,9 @@ use crate::blocker::{Blocker, BlockerResult}; use crate::cosmetic_filter_cache::{CosmeticFilterCache, UrlSpecificResources}; -use crate::filters::fb_builder::FlatBufferBuilder; +use crate::filters::fb_builder::make_flatbuffer_from_rules; use crate::filters::fb_network::{FilterDataContext, FilterDataContextRef}; +use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; use crate::lists::{FilterSet, ParseOptions}; use crate::regex_manager::RegexManagerDiscardPolicy; use crate::request::Request; @@ -51,13 +52,24 @@ pub struct Engine { filter_data_context: FilterDataContextRef, } +const ADBLOCK_FLATBUFFER_VERSION: u32 = 1; + +#[derive(Debug)] +pub enum DeserializationError { + VersionMismatch(u32), + FlatBufferParsingError(flatbuffers::InvalidFlatbuffer), + ValidationError, +} + impl Default for Engine { fn default() -> Self { let filter_data_context = FilterDataContextRef::new(Default::default()); Self { blocker: Blocker::from_context(FilterDataContextRef::clone(&filter_data_context)), - cosmetic_cache: CosmeticFilterCache::new(), + cosmetic_cache: CosmeticFilterCache::from_context(FilterDataContextRef::clone( + &filter_data_context, + )), resources: ResourceStorage::default(), filter_data_context, } @@ -103,13 +115,20 @@ impl Engine { .. } = set; - let memory = FlatBufferBuilder::make_flatbuffer(network_filters, optimize); + let memory = make_flatbuffer_from_rules( + network_filters, + cosmetic_filters, + optimize, + ADBLOCK_FLATBUFFER_VERSION, + ); let filter_data_context = FilterDataContext::new(memory); Self { blocker: Blocker::from_context(FilterDataContextRef::clone(&filter_data_context)), - cosmetic_cache: CosmeticFilterCache::from_rules(cosmetic_filters), + cosmetic_cache: CosmeticFilterCache::from_context(FilterDataContextRef::clone( + &filter_data_context, + )), resources: ResourceStorage::default(), filter_data_context, } @@ -240,8 +259,8 @@ impl Engine { } /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later. - pub fn serialize(&self) -> Result, crate::data_format::SerializationError> { - crate::data_format::serialize_engine(&self.filter_data_context.memory, &self.cosmetic_cache) + pub fn serialize(&self) -> &[u8] { + self.filter_data_context.memory.data() } /// Deserialize the `Engine` from the binary format generated by `Engine::serialize`. @@ -249,18 +268,24 @@ impl Engine { /// Note that the binary format has a built-in version number that may be incremented. There is /// no guarantee that later versions of the format will be deserializable across minor versions /// of adblock-rust; the format is provided only as a caching optimization. - pub fn deserialize( - &mut self, - serialized: &[u8], - ) -> Result<(), crate::data_format::DeserializationError> { + pub fn deserialize(&mut self, serialized: &[u8]) -> Result<(), DeserializationError> { let current_tags = self.blocker.tags_enabled(); - let (memory, cosmetic_cache) = crate::data_format::deserialize_engine(serialized)?; - self.filter_data_context = FilterDataContext::new(memory); + let memory = VerifiedFlatbufferMemory::from_raw(serialized.to_vec()) + .map_err(DeserializationError::FlatBufferParsingError)?; + if memory.root().version() != ADBLOCK_FLATBUFFER_VERSION { + return Err(DeserializationError::VersionMismatch( + memory.root().version(), + )); + } + let context = FilterDataContext::new(memory); + self.filter_data_context = context; self.blocker = Blocker::from_context(FilterDataContextRef::clone(&self.filter_data_context)); self.blocker .use_tags(¤t_tags.iter().map(|s| &**s).collect::>()); - self.cosmetic_cache = cosmetic_cache; + self.cosmetic_cache = CosmeticFilterCache::from_context(FilterDataContextRef::clone( + &self.filter_data_context, + )); Ok(()) } } diff --git a/src/filters/fb_builder.rs b/src/filters/fb_builder.rs index d83833bc..929eef66 100644 --- a/src/filters/fb_builder.rs +++ b/src/filters/fb_builder.rs @@ -6,9 +6,13 @@ use std::collections::{HashMap, HashSet}; use std::vec; -use flatbuffers::WIPOffset; +use flatbuffers::{ForwardsUOffset, Vector, WIPOffset}; +use crate::cosmetic_filter_cache::CosmeticFilterCacheBuilder; +use crate::filters::cosmetic::CosmeticFilter; use crate::filters::network::{NetworkFilter, NetworkFilterMaskHelper}; +use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder; +use crate::flatbuffers::containers::flat_serialize::{Builder, FlatSerialize}; use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; use crate::network_filter_list::token_histogram; use crate::optimizer; @@ -29,65 +33,83 @@ pub(crate) enum NetworkFilterListId { } #[derive(Default, Clone)] -struct FilterListBuilder { +struct NetworkFilterListBuilder { filters: Vec, + optimize: bool, } -pub(crate) struct FlatBufferBuilder { - lists: Vec, - - unique_domains_hashes: Vec, - unique_domains_hashes_map: HashMap, - index: u32, +struct EngineFlatBuilder<'a> { + fb_builder: flatbuffers::FlatBufferBuilder<'a>, + unique_domains_hashes: HashMap, + unique_domains_hashes_vec: Vec, } -impl FlatBufferBuilder { - pub fn new(list_count: usize) -> Self { +impl Default for EngineFlatBuilder<'_> { + fn default() -> Self { Self { - lists: vec![FilterListBuilder::default(); list_count], - unique_domains_hashes: vec![], - unique_domains_hashes_map: HashMap::new(), - index: 0, + fb_builder: flatbuffers::FlatBufferBuilder::new(), + unique_domains_hashes: HashMap::new(), + unique_domains_hashes_vec: Vec::new(), } } +} - fn get_or_insert_unique_domain_hash(&mut self, h: &Hash) -> u32 { - if let Some(&index) = self.unique_domains_hashes_map.get(h) { +impl<'a> EngineFlatBuilder<'a> { + pub fn get_or_insert_unique_domain_hash(&mut self, hash: &Hash) -> u32 { + if let Some(&index) = self.unique_domains_hashes.get(hash) { return index; } - let index = self.unique_domains_hashes.len() as u32; - self.unique_domains_hashes.push(*h); - self.unique_domains_hashes_map.insert(*h, index); + let index = self.unique_domains_hashes_vec.len() as u32; + self.unique_domains_hashes_vec.push(*hash); + self.unique_domains_hashes.insert(*hash, index); index } - pub fn add_filter(&mut self, network_filter: NetworkFilter, list_id: u32) { - self.lists[list_id as usize].filters.push(network_filter); + pub fn write_unique_domains(&mut self) -> WIPOffset> { + self.fb_builder + .create_vector(&self.unique_domains_hashes_vec) + } +} + +impl<'a> Builder<'a> for EngineFlatBuilder<'a> { + fn create_string(&mut self, s: &str) -> WIPOffset<&'a str> { + self.fb_builder.create_string(s) } - fn write_filter<'a>( - &mut self, - builder: &mut flatbuffers::FlatBufferBuilder<'a>, + fn raw_builder(&mut self) -> &mut flatbuffers::FlatBufferBuilder<'a> { + &mut self.fb_builder + } +} + +struct NetworkRulesBuilder { + lists: Vec, +} + +impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for &NetworkFilter { + type Output = WIPOffset>; + + fn serialize( network_filter: &NetworkFilter, + builder: &mut EngineFlatBuilder<'a>, ) -> WIPOffset> { let opt_domains = network_filter.opt_domains.as_ref().map(|v| { let mut o: Vec = v .iter() - .map(|x| self.get_or_insert_unique_domain_hash(x)) + .map(|x| builder.get_or_insert_unique_domain_hash(x)) .collect(); o.sort_unstable(); o.dedup(); - builder.create_vector(&o) + FlatSerialize::serialize(o, builder) }); let opt_not_domains = network_filter.opt_not_domains.as_ref().map(|v| { let mut o: Vec = v .iter() - .map(|x| self.get_or_insert_unique_domain_hash(x)) + .map(|x| builder.get_or_insert_unique_domain_hash(x)) .collect(); o.sort_unstable(); o.dedup(); - builder.create_vector(&o) + FlatSerialize::serialize(o, builder) }); let modifier_option = network_filter @@ -111,7 +133,7 @@ impl FlatBufferBuilder { .iter() .map(|s| builder.create_string(s)) .collect(); - Some(builder.create_vector(&offsets)) + Some(FlatSerialize::serialize(offsets, builder)) } else { None }; @@ -121,8 +143,8 @@ impl FlatBufferBuilder { .as_ref() .map(|v| builder.create_string(v.as_str())); - let filter = fb::NetworkFilter::create( - builder, + let network_filter = fb::NetworkFilter::create( + &mut builder.fb_builder, &fb::NetworkFilterArgs { mask: network_filter.mask.bits(), patterns, @@ -135,53 +157,32 @@ impl FlatBufferBuilder { }, ); - self.index += 1; - - filter + network_filter } +} - pub fn finish(&mut self, optimize: bool) -> VerifiedFlatbufferMemory { - let mut builder = flatbuffers::FlatBufferBuilder::new(); - let mut flat_network_rules = vec![]; - - let lists = std::mem::take(&mut self.lists); - for (list_id, list) in lists.into_iter().enumerate() { - // Don't optimize removeparam, since it can fuse filters without respecting distinct - let optimize = optimize && list_id != NetworkFilterListId::RemoveParam as usize; - - flat_network_rules.push(self.write_filter_list(&mut builder, list.filters, optimize)); +impl NetworkFilterListBuilder { + fn new(optimize: bool) -> Self { + Self { + filters: vec![], + optimize, } - - // Create vectors first to avoid simultaneous mutable borrows of `builder`. - let network_rules = builder.create_vector(&flat_network_rules); - let unique_vec = builder.create_vector(&self.unique_domains_hashes); - - let root = fb::Engine::create( - &mut builder, - &fb::EngineArgs { - network_rules: Some(network_rules), - unique_domains_hashes: Some(unique_vec), - }, - ); - - builder.finish(root, None); - - // TODO: consider using builder.collapse() to avoid reallocating memory. - VerifiedFlatbufferMemory::from_builder(&builder) } +} - pub fn write_filter_list<'a>( - &mut self, - builder: &mut flatbuffers::FlatBufferBuilder<'a>, - filters: Vec, - optimize: bool, +impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder { + type Output = WIPOffset>; + fn serialize( + value: Self, + builder: &mut EngineFlatBuilder<'a>, ) -> WIPOffset> { let mut filter_map = HashMap::>>>::new(); let mut optimizable = HashMap::>::new(); // Compute tokens for all filters - let filter_tokens: Vec<_> = filters + let filter_tokens: Vec<_> = value + .filters .into_iter() .map(|filter| { let tokens = filter.get_tokens(); @@ -193,11 +194,11 @@ impl FlatBufferBuilder { let (total_number_of_tokens, tokens_histogram) = token_histogram(&filter_tokens); { - for (network_filter, multi_tokens) in filter_tokens { - let flat_filter = if !optimize + for (network_filter, multi_tokens) in filter_tokens.into_iter() { + let flat_filter = if !value.optimize || !optimizer::is_filter_optimizable_by_patterns(&network_filter) { - Some(self.write_filter(builder, &network_filter)) + Some(FlatSerialize::serialize(&network_filter, builder)) } else { None }; @@ -232,7 +233,7 @@ impl FlatBufferBuilder { } } - if optimize { + if value.optimize { // Sort the entries to ensure deterministic iteration order let mut optimizable_entries: Vec<_> = optimizable.drain().collect(); optimizable_entries.sort_unstable_by_key(|(token, _)| *token); @@ -241,7 +242,7 @@ impl FlatBufferBuilder { let optimized = optimizer::optimize(v); for filter in optimized { - let flat_filter = self.write_filter(builder, &filter); + let flat_filter = FlatSerialize::serialize(&filter, builder); filter_map.entry(token).or_default().push(flat_filter); } } @@ -252,49 +253,35 @@ impl FlatBufferBuilder { ); } - let len = filter_map.len(); - - // Convert filter_map keys to a sorted vector of (hash, filter_indices). - let mut entries: Vec<_> = filter_map.drain().collect(); - entries.sort_unstable_by_key(|(k, _)| *k); - - // Convert sorted_entries to two flatbuffers vectors. - let mut flat_index: Vec = Vec::with_capacity(len); - let mut flat_values: Vec<_> = Vec::with_capacity(len); - for (key, filter_indices) in entries { - for &filter_index in &filter_indices { - flat_index.push(key); - flat_values.push(filter_index); - } - } - - let filter_map_index = builder.create_vector(&flat_index); - let filter_map_values = builder.create_vector(&flat_values); + let flat_filter_map_builder = FlatMultiMapBuilder::from_filter_map(filter_map); + let flat_filter_map = FlatMultiMapBuilder::finish(flat_filter_map_builder, builder); fb::NetworkFilterList::create( - builder, + builder.raw_builder(), &fb::NetworkFilterListArgs { - filter_map_index: Some(filter_map_index), - filter_map_values: Some(filter_map_values), + filter_map_index: Some(flat_filter_map.keys), + filter_map_values: Some(flat_filter_map.values), }, ) } +} - pub fn make_flatbuffer( - network_filters: Vec, - optimize: bool, - ) -> VerifiedFlatbufferMemory { - type FilterId = NetworkFilterListId; - let mut builder = FlatBufferBuilder::new(FilterId::Size as usize); +impl NetworkRulesBuilder { + pub fn from_rules(network_filters: Vec, optimize: bool) -> Self { + let mut lists = vec![]; + for list_id in 0..NetworkFilterListId::Size as usize { + // Don't optimize removeparam, since it can fuse filters without respecting distinct + let optimize = optimize && list_id != NetworkFilterListId::RemoveParam as usize; + lists.push(NetworkFilterListBuilder::new(optimize)); + } + let mut self_ = Self { lists }; let mut badfilter_ids: HashSet = HashSet::new(); - for filter in network_filters.iter() { + for filter in network_filters.into_iter() { if filter.is_badfilter() { badfilter_ids.insert(filter.get_id_without_badfilter()); } - } - for filter in network_filters.into_iter() { - // skip any bad filters + let filter_id = filter.get_id(); if badfilter_ids.contains(&filter_id) || filter.is_badfilter() { continue; @@ -302,8 +289,9 @@ impl FlatBufferBuilder { // Redirects are independent of blocking behavior. if filter.is_redirect() { - builder.add_filter(filter.clone(), FilterId::Redirects as u32); + self_.add_filter(filter.clone(), NetworkFilterListId::Redirects); } + type FilterId = NetworkFilterListId; let list_id: FilterId = if filter.is_csp() { FilterId::Csp @@ -326,9 +314,57 @@ impl FlatBufferBuilder { continue; }; - builder.add_filter(filter, list_id as u32); + self_.add_filter(filter, list_id); } - builder.finish(optimize) + self_ + } + fn add_filter(&mut self, network_filter: NetworkFilter, list_id: NetworkFilterListId) { + self.lists[list_id as usize].filters.push(network_filter); + } +} + +impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkRulesBuilder { + type Output = WIPOffset>>>; + fn serialize( + value: Self, + builder: &mut EngineFlatBuilder<'a>, + ) -> WIPOffset>>> { + let flat_network_rules: Vec<_> = value + .lists + .into_iter() + .map(|list| FlatSerialize::serialize(list, builder)) + .collect(); + builder.raw_builder().create_vector(&flat_network_rules) } } + +pub fn make_flatbuffer_from_rules( + network_filters: Vec, + cosmetic_rules: Vec, + optimize: bool, + version: u32, +) -> VerifiedFlatbufferMemory { + let mut builder = EngineFlatBuilder::default(); + + let network_builder = NetworkRulesBuilder::from_rules(network_filters, optimize); + let flat_network_filters = FlatSerialize::serialize(network_builder, &mut builder); + + let cosmetic_builder = CosmeticFilterCacheBuilder::from_rules(cosmetic_rules); + let flat_cosmetic_filters = FlatSerialize::serialize(cosmetic_builder, &mut builder); + + let flat_unique_domains_hashes = builder.write_unique_domains(); + + let root = fb::Engine::create( + &mut builder.fb_builder, + &fb::EngineArgs { + version, + network_rules: Some(flat_network_filters), + unique_domains_hashes: Some(flat_unique_domains_hashes), + cosmetic_filters: Some(flat_cosmetic_filters), + }, + ); + builder.fb_builder.finish(root, None); + // TODO: consider using builder.collapse() to avoid reallocating memory. + VerifiedFlatbufferMemory::from_builder(&builder.fb_builder) +} diff --git a/src/filters/fb_network.rs b/src/filters/fb_network.rs index c6d7940c..50d4d9c4 100644 --- a/src/filters/fb_network.rs +++ b/src/filters/fb_network.rs @@ -2,7 +2,6 @@ use std::collections::HashMap; -use crate::filters::fb_builder::FlatBufferBuilder; use crate::filters::network::{NetworkFilterMask, NetworkFilterMaskHelper, NetworkMatchable}; use crate::flatbuffers::unsafe_tools::{fb_vector_to_slice, VerifiedFlatbufferMemory}; @@ -84,7 +83,12 @@ pub(crate) struct FilterDataContext { impl Default for FilterDataContext { fn default() -> Self { Self { - memory: FlatBufferBuilder::make_flatbuffer(vec![], false), + memory: crate::filters::fb_builder::make_flatbuffer_from_rules( + vec![], + vec![], + false, + 0, + ), unique_domains_hashes_map: HashMap::new(), } } @@ -143,14 +147,14 @@ impl<'a> FlatNetworkFilter<'a> { pub fn include_domains(&self) -> Option<&[u32]> { self.fb_filter .opt_domains() - .map(|data| fb_vector_to_slice(data)) + .map(|data| fb_vector_to_slice(&data)) } #[inline(always)] pub fn exclude_domains(&self) -> Option<&[u32]> { self.fb_filter .opt_not_domains() - .map(|data| fb_vector_to_slice(data)) + .map(|data| fb_vector_to_slice(&data)) } #[inline(always)] diff --git a/src/flatbuffers/containers/flat_multimap.rs b/src/flatbuffers/containers/flat_multimap.rs index a90dcc59..b74281eb 100644 --- a/src/flatbuffers/containers/flat_multimap.rs +++ b/src/flatbuffers/containers/flat_multimap.rs @@ -1,7 +1,10 @@ -use std::marker::PhantomData; +use std::{collections::HashMap, marker::PhantomData}; -use crate::flatbuffers::containers::sorted_index::SortedIndex; -use flatbuffers::{Follow, Vector}; +use crate::flatbuffers::containers::{ + flat_serialize::{Builder, FlatSerialize, FlatVec}, + sorted_index::SortedIndex, +}; +use flatbuffers::{Follow, ForwardsUOffset, Vector}; /// A map-like container that uses flatbuffer references. /// Provides O(log n) lookup time using binary search on the sorted index. @@ -52,6 +55,40 @@ where } } +pub(crate) struct FlatMapView<'a, I: Ord, V, Keys> +where + Keys: SortedIndex, + V: Follow<'a>, +{ + keys: Keys, + values: Vector<'a, V>, + _phantom: PhantomData, +} + +impl<'a, I: Ord + Copy, V, Keys> FlatMapView<'a, I, V, Keys> +where + Keys: SortedIndex + Clone, + V: Follow<'a>, +{ + pub fn new(keys: Keys, values: Vector<'a, V>) -> Self { + debug_assert!(keys.len() == values.len()); + Self { + keys, + values, + _phantom: PhantomData, + } + } + + pub fn get(&self, key: I) -> Option<>::Inner> { + let index = self.keys.partition_point(|x| *x < key); + if index < self.keys.len() && self.keys.get(index) == key { + Some(self.values.get(index)) + } else { + None + } + } +} + pub(crate) struct FlatMultiMapViewIterator<'a, I: Ord + Copy, V, Keys> where Keys: SortedIndex, @@ -81,6 +118,88 @@ where } } +pub(crate) type FlatMapStringView<'a, V> = + FlatMultiMapView<'a, &'a str, V, Vector<'a, ForwardsUOffset<&'a str>>>; + +#[derive(Default)] +pub(crate) struct FlatMultiMapBuilder { + map: HashMap>, +} + +pub(crate) struct MapBuilderOutput<'a, I, V, B: Builder<'a>> +where + I: FlatSerialize<'a, B>, + V: FlatSerialize<'a, B>, +{ + pub(crate) keys: FlatVec<'a, I, B>, + pub(crate) values: FlatVec<'a, V, B>, +} + +impl FlatMultiMapBuilder { + pub fn from_filter_map(map: HashMap>) -> Self { + Self { map } + } + + pub fn insert(&mut self, key: I, value: V) { + self.map.entry(key).or_default().push(value); + } + + pub fn finish<'a, B: Builder<'a>>(value: Self, builder: &mut B) -> MapBuilderOutput<'a, I, V, B> + where + I: FlatSerialize<'a, B>, + V: FlatSerialize<'a, B>, + { + let mut entries: Vec<_> = value.map.into_iter().collect(); + entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); + let mut indexes = Vec::with_capacity(entries.len()); + let mut values = Vec::with_capacity(entries.len()); + + for (key, mv) in entries.into_iter() { + let index = FlatSerialize::serialize(key, builder); + for value in mv.into_iter() { + indexes.push(index.clone()); + values.push(FlatSerialize::serialize(value, builder)); + } + } + + let indexes_vec = builder.raw_builder().create_vector(&indexes); + let values_vec = builder.raw_builder().create_vector(&values); + + MapBuilderOutput { + keys: indexes_vec, + values: values_vec, + } + } +} + +pub(crate) struct FlatMapBuilder; + +impl FlatMapBuilder { + pub fn finish<'a, I, V, B: Builder<'a>>( + value: HashMap, + builder: &mut B, + ) -> MapBuilderOutput<'a, I, V, B> + where + I: FlatSerialize<'a, B> + Ord, + V: FlatSerialize<'a, B>, + { + let mut entries: Vec<_> = value.into_iter().collect(); + entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); + let mut indexes = Vec::with_capacity(entries.len()); + let mut values = Vec::with_capacity(entries.len()); + + for (key, value) in entries.into_iter() { + indexes.push(FlatSerialize::serialize(key, builder)); + values.push(FlatSerialize::serialize(value, builder)); + } + + MapBuilderOutput { + keys: builder.raw_builder().create_vector(&indexes), + values: builder.raw_builder().create_vector(&values), + } + } +} + #[cfg(test)] #[path = "../../../tests/unit/flatbuffers/containers/flat_multimap.rs"] mod unit_tests; diff --git a/src/flatbuffers/containers/flat_serialize.rs b/src/flatbuffers/containers/flat_serialize.rs new file mode 100644 index 00000000..28192019 --- /dev/null +++ b/src/flatbuffers/containers/flat_serialize.rs @@ -0,0 +1,89 @@ +use flatbuffers::{Vector, WIPOffset}; + +pub trait Builder<'a> { + fn create_string(&mut self, s: &str) -> WIPOffset<&'a str>; + fn raw_builder(&mut self) -> &mut flatbuffers::FlatBufferBuilder<'a>; +} + +pub trait FlatSerialize<'b, B: Builder<'b>>: Sized { + type Output: Sized + Clone + flatbuffers::Push + 'b; + fn serialize(value: Self, builder: &mut B) -> Self::Output; +} + +impl<'b, B: Builder<'b>> FlatSerialize<'b, B> for String { + type Output = WIPOffset<&'b str>; + fn serialize(value: Self, builder: &mut B) -> Self::Output { + builder.create_string(&value) + } +} + +impl<'b, B: Builder<'b>> FlatSerialize<'b, B> for &str { + type Output = WIPOffset<&'b str>; + fn serialize(value: Self, builder: &mut B) -> Self::Output { + builder.create_string(value) + } +} + +impl<'b, B: Builder<'b>> FlatSerialize<'b, B> for u32 { + type Output = u32; + fn serialize(value: Self, _builder: &mut B) -> Self::Output { + value + } +} + +impl<'b, B: Builder<'b>> FlatSerialize<'b, B> for u64 { + type Output = u64; + fn serialize(value: Self, _builder: &mut B) -> Self::Output { + value + } +} + +impl<'b, B: Builder<'b>, T: 'b> FlatSerialize<'b, B> for WIPOffset { + type Output = WIPOffset; + fn serialize(value: Self, _builder: &mut B) -> Self::Output { + value + } +} + +impl<'b, B: Builder<'b>, T: FlatSerialize<'b, B>> FlatSerialize<'b, B> for Vec { + type Output = + WIPOffset>::Output as flatbuffers::Push>::Output>>; + fn serialize(value: Self, builder: &mut B) -> Self::Output { + let v = value + .into_iter() + .map(|x| FlatSerialize::serialize(x, builder)) + .collect::>(); + builder.raw_builder().create_vector(&v) + } +} + +pub(crate) type FlatVec<'b, T, B> = + WIPOffset>::Output as flatbuffers::Push>::Output>>; +pub(crate) fn serialize_vec_opt<'b, B: Builder<'b>, T: FlatSerialize<'b, B>>( + value: Vec, + builder: &mut B, +) -> Option> { + if value.is_empty() { + None + } else { + Some(FlatSerialize::serialize(value, builder)) + } +} + +impl<'b, B: Builder<'b>, T: FlatSerialize<'b, B> + std::hash::Hash + Eq + Ord> FlatSerialize<'b, B> + for std::collections::HashSet +{ + type Output = + WIPOffset>::Output as flatbuffers::Push>::Output>>; + + fn serialize(value: Self, builder: &mut B) -> Self::Output { + let mut items = value.into_iter().collect::>(); + items.sort_unstable(); + let v = items + .into_iter() + .map(|x| FlatSerialize::serialize(x, builder)) + .collect::>(); + + builder.raw_builder().create_vector(&v) + } +} diff --git a/src/flatbuffers/containers/mod.rs b/src/flatbuffers/containers/mod.rs index 507620de..50164fd2 100644 --- a/src/flatbuffers/containers/mod.rs +++ b/src/flatbuffers/containers/mod.rs @@ -1,3 +1,4 @@ pub(crate) mod flat_multimap; +pub(crate) mod flat_serialize; pub(crate) mod flat_set; pub(crate) mod sorted_index; diff --git a/src/flatbuffers/fb_network_filter.fbs b/src/flatbuffers/fb_network_filter.fbs index 332a91ae..b281b31c 100644 --- a/src/flatbuffers/fb_network_filter.fbs +++ b/src/flatbuffers/fb_network_filter.fbs @@ -29,10 +29,56 @@ table NetworkFilterList { filter_map_values: [NetworkFilter] (required); } +table HostnameSpecificRules { + unhide: [string]; + uninject_script: [string]; + procedural_action: [string]; + procedural_action_exception: [string]; +} + +table CosmeticFilters { + /// Rules that are just the CSS class of an element to be hidden on all sites, e.g. `##.ad`. + simple_class_rules: [string] (required); + + /// Rules that are just the CSS id of an element to be hidden on all sites, e.g. `###banner`. + simple_id_rules: [string] (required); + + /// Rules that are the CSS selector of an element to be hidden on all sites that do not fit + /// into any of the class or id buckets, e.g. `##a[href="https://malware.com"]` + misc_generic_selectors: [string] (required); + + /// Complex class rules - CSS selectors starting with a class, e.g. `##.ad image` + /// These are stored as a multi-map from class name to list of selectors + complex_class_rules_index: [string] (required); + complex_class_rules_values: [string] (required); + + /// Complex id rules - CSS selectors starting with an id, e.g. `###banner > .text a` + /// These are stored as a multi-map from id name to list of selectors + complex_id_rules_index: [string] (required); + complex_id_rules_values: [string] (required); + + /// Hostname-specific hide filters - multi-map from hostname hash to CSS selectors + hostname_hide_index: [uint64] (required); + hostname_hide_values: [string] (required); + + /// Hostname-specific script injection filters - multi-map from hostname hash to script data + /// First byte of each script encodes permission bits to avoid separate permissions array + hostname_inject_script_index: [uint64] (required); + hostname_inject_script_values: [string] (required); + + // Map from hostname(hash) => HostnameSpecificRules using FlatMultiMapBuilder + // Store only one item per domain using FlatMultiMapBuilder (for remaining rule types) + hostname_index: [uint64] (required); + hostname_values: [HostnameSpecificRules] (required); +} + // A root type containing a serialized Engine. // Currently it contains only some of engine fields: // network filters and supporing struct. table Engine { + // Format version. Should be increased when makeing non back-compatible changes. + version: uint32; + // Contains several NetworkFilterList matching to different kinds of lists. // The indexes are matching NetworkFilterListId. // The size must be NetworkFilterListId::Size. @@ -40,6 +86,8 @@ table Engine { // Contains hashes for opt_(not)_domains. See opt_domains for details. unique_domains_hashes: [uint64] (required); + + cosmetic_filters: CosmeticFilters (required); } root_type Engine; diff --git a/src/flatbuffers/fb_network_filter_generated.rs b/src/flatbuffers/fb_network_filter_generated.rs index 5b1e7ece..d2179b84 100644 --- a/src/flatbuffers/fb_network_filter_generated.rs +++ b/src/flatbuffers/fb_network_filter_generated.rs @@ -654,6 +654,1213 @@ pub mod fb { ) } } + pub enum HostnameSpecificRulesOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct HostnameSpecificRules<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for HostnameSpecificRules<'a> { + type Inner = HostnameSpecificRules<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } + } + + impl<'a> HostnameSpecificRules<'a> { + pub const VT_UNHIDE: flatbuffers::VOffsetT = 4; + pub const VT_UNINJECT_SCRIPT: flatbuffers::VOffsetT = 6; + pub const VT_PROCEDURAL_ACTION: flatbuffers::VOffsetT = 8; + pub const VT_PROCEDURAL_ACTION_EXCEPTION: flatbuffers::VOffsetT = 10; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + HostnameSpecificRules { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args HostnameSpecificRulesArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = HostnameSpecificRulesBuilder::new(_fbb); + if let Some(x) = args.procedural_action_exception { + builder.add_procedural_action_exception(x); + } + if let Some(x) = args.procedural_action { + builder.add_procedural_action(x); + } + if let Some(x) = args.uninject_script { + builder.add_uninject_script(x); + } + if let Some(x) = args.unhide { + builder.add_unhide(x); + } + builder.finish() + } + + pub fn unpack(&self) -> HostnameSpecificRulesT { + let unhide = self + .unhide() + .map(|x| x.iter().map(|s| s.to_string()).collect()); + let uninject_script = self + .uninject_script() + .map(|x| x.iter().map(|s| s.to_string()).collect()); + let procedural_action = self + .procedural_action() + .map(|x| x.iter().map(|s| s.to_string()).collect()); + let procedural_action_exception = self + .procedural_action_exception() + .map(|x| x.iter().map(|s| s.to_string()).collect()); + HostnameSpecificRulesT { + unhide, + uninject_script, + procedural_action, + procedural_action_exception, + } + } + + #[inline] + pub fn unhide( + &self, + ) -> Option>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>(HostnameSpecificRules::VT_UNHIDE, None) + } + } + #[inline] + pub fn uninject_script( + &self, + ) -> Option>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>(HostnameSpecificRules::VT_UNINJECT_SCRIPT, None) + } + } + #[inline] + pub fn procedural_action( + &self, + ) -> Option>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>(HostnameSpecificRules::VT_PROCEDURAL_ACTION, None) + } + } + #[inline] + pub fn procedural_action_exception( + &self, + ) -> Option>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>( + HostnameSpecificRules::VT_PROCEDURAL_ACTION_EXCEPTION, None + ) + } + } + } + + impl flatbuffers::Verifiable for HostnameSpecificRules<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>, + >>("unhide", Self::VT_UNHIDE, false)? + .visit_field::>, + >>("uninject_script", Self::VT_UNINJECT_SCRIPT, false)? + .visit_field::>, + >>("procedural_action", Self::VT_PROCEDURAL_ACTION, false)? + .visit_field::>, + >>( + "procedural_action_exception", + Self::VT_PROCEDURAL_ACTION_EXCEPTION, + false, + )? + .finish(); + Ok(()) + } + } + pub struct HostnameSpecificRulesArgs<'a> { + pub unhide: Option< + flatbuffers::WIPOffset>>, + >, + pub uninject_script: Option< + flatbuffers::WIPOffset>>, + >, + pub procedural_action: Option< + flatbuffers::WIPOffset>>, + >, + pub procedural_action_exception: Option< + flatbuffers::WIPOffset>>, + >, + } + impl<'a> Default for HostnameSpecificRulesArgs<'a> { + #[inline] + fn default() -> Self { + HostnameSpecificRulesArgs { + unhide: None, + uninject_script: None, + procedural_action: None, + procedural_action_exception: None, + } + } + } + + pub struct HostnameSpecificRulesBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> HostnameSpecificRulesBuilder<'a, 'b, A> { + #[inline] + pub fn add_unhide( + &mut self, + unhide: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + HostnameSpecificRules::VT_UNHIDE, + unhide, + ); + } + #[inline] + pub fn add_uninject_script( + &mut self, + uninject_script: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + HostnameSpecificRules::VT_UNINJECT_SCRIPT, + uninject_script, + ); + } + #[inline] + pub fn add_procedural_action( + &mut self, + procedural_action: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + HostnameSpecificRules::VT_PROCEDURAL_ACTION, + procedural_action, + ); + } + #[inline] + pub fn add_procedural_action_exception( + &mut self, + procedural_action_exception: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + HostnameSpecificRules::VT_PROCEDURAL_ACTION_EXCEPTION, + procedural_action_exception, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> HostnameSpecificRulesBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + HostnameSpecificRulesBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for HostnameSpecificRules<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("HostnameSpecificRules"); + ds.field("unhide", &self.unhide()); + ds.field("uninject_script", &self.uninject_script()); + ds.field("procedural_action", &self.procedural_action()); + ds.field( + "procedural_action_exception", + &self.procedural_action_exception(), + ); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct HostnameSpecificRulesT { + pub unhide: Option>, + pub uninject_script: Option>, + pub procedural_action: Option>, + pub procedural_action_exception: Option>, + } + impl Default for HostnameSpecificRulesT { + fn default() -> Self { + Self { + unhide: None, + uninject_script: None, + procedural_action: None, + procedural_action_exception: None, + } + } + } + impl HostnameSpecificRulesT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let unhide = self.unhide.as_ref().map(|x| { + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let uninject_script = self.uninject_script.as_ref().map(|x| { + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let procedural_action = self.procedural_action.as_ref().map(|x| { + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let procedural_action_exception = self.procedural_action_exception.as_ref().map(|x| { + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + HostnameSpecificRules::create( + _fbb, + &HostnameSpecificRulesArgs { + unhide, + uninject_script, + procedural_action, + procedural_action_exception, + }, + ) + } + } + pub enum CosmeticFiltersOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct CosmeticFilters<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for CosmeticFilters<'a> { + type Inner = CosmeticFilters<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } + } + + impl<'a> CosmeticFilters<'a> { + pub const VT_SIMPLE_CLASS_RULES: flatbuffers::VOffsetT = 4; + pub const VT_SIMPLE_ID_RULES: flatbuffers::VOffsetT = 6; + pub const VT_MISC_GENERIC_SELECTORS: flatbuffers::VOffsetT = 8; + pub const VT_COMPLEX_CLASS_RULES_INDEX: flatbuffers::VOffsetT = 10; + pub const VT_COMPLEX_CLASS_RULES_VALUES: flatbuffers::VOffsetT = 12; + pub const VT_COMPLEX_ID_RULES_INDEX: flatbuffers::VOffsetT = 14; + pub const VT_COMPLEX_ID_RULES_VALUES: flatbuffers::VOffsetT = 16; + pub const VT_HOSTNAME_HIDE_INDEX: flatbuffers::VOffsetT = 18; + pub const VT_HOSTNAME_HIDE_VALUES: flatbuffers::VOffsetT = 20; + pub const VT_HOSTNAME_INJECT_SCRIPT_INDEX: flatbuffers::VOffsetT = 22; + pub const VT_HOSTNAME_INJECT_SCRIPT_VALUES: flatbuffers::VOffsetT = 24; + pub const VT_HOSTNAME_INDEX: flatbuffers::VOffsetT = 26; + pub const VT_HOSTNAME_VALUES: flatbuffers::VOffsetT = 28; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + CosmeticFilters { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args CosmeticFiltersArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = CosmeticFiltersBuilder::new(_fbb); + if let Some(x) = args.hostname_values { + builder.add_hostname_values(x); + } + if let Some(x) = args.hostname_index { + builder.add_hostname_index(x); + } + if let Some(x) = args.hostname_inject_script_values { + builder.add_hostname_inject_script_values(x); + } + if let Some(x) = args.hostname_inject_script_index { + builder.add_hostname_inject_script_index(x); + } + if let Some(x) = args.hostname_hide_values { + builder.add_hostname_hide_values(x); + } + if let Some(x) = args.hostname_hide_index { + builder.add_hostname_hide_index(x); + } + if let Some(x) = args.complex_id_rules_values { + builder.add_complex_id_rules_values(x); + } + if let Some(x) = args.complex_id_rules_index { + builder.add_complex_id_rules_index(x); + } + if let Some(x) = args.complex_class_rules_values { + builder.add_complex_class_rules_values(x); + } + if let Some(x) = args.complex_class_rules_index { + builder.add_complex_class_rules_index(x); + } + if let Some(x) = args.misc_generic_selectors { + builder.add_misc_generic_selectors(x); + } + if let Some(x) = args.simple_id_rules { + builder.add_simple_id_rules(x); + } + if let Some(x) = args.simple_class_rules { + builder.add_simple_class_rules(x); + } + builder.finish() + } + + pub fn unpack(&self) -> CosmeticFiltersT { + let simple_class_rules = { + let x = self.simple_class_rules(); + x.iter().map(|s| s.to_string()).collect() + }; + let simple_id_rules = { + let x = self.simple_id_rules(); + x.iter().map(|s| s.to_string()).collect() + }; + let misc_generic_selectors = { + let x = self.misc_generic_selectors(); + x.iter().map(|s| s.to_string()).collect() + }; + let complex_class_rules_index = { + let x = self.complex_class_rules_index(); + x.iter().map(|s| s.to_string()).collect() + }; + let complex_class_rules_values = { + let x = self.complex_class_rules_values(); + x.iter().map(|s| s.to_string()).collect() + }; + let complex_id_rules_index = { + let x = self.complex_id_rules_index(); + x.iter().map(|s| s.to_string()).collect() + }; + let complex_id_rules_values = { + let x = self.complex_id_rules_values(); + x.iter().map(|s| s.to_string()).collect() + }; + let hostname_hide_index = { + let x = self.hostname_hide_index(); + x.into_iter().collect() + }; + let hostname_hide_values = { + let x = self.hostname_hide_values(); + x.iter().map(|s| s.to_string()).collect() + }; + let hostname_inject_script_index = { + let x = self.hostname_inject_script_index(); + x.into_iter().collect() + }; + let hostname_inject_script_values = { + let x = self.hostname_inject_script_values(); + x.iter().map(|s| s.to_string()).collect() + }; + let hostname_index = { + let x = self.hostname_index(); + x.into_iter().collect() + }; + let hostname_values = { + let x = self.hostname_values(); + x.iter().map(|t| t.unpack()).collect() + }; + CosmeticFiltersT { + simple_class_rules, + simple_id_rules, + misc_generic_selectors, + complex_class_rules_index, + complex_class_rules_values, + complex_id_rules_index, + complex_id_rules_values, + hostname_hide_index, + hostname_hide_values, + hostname_inject_script_index, + hostname_inject_script_values, + hostname_index, + hostname_values, + } + } + + /// Rules that are just the CSS class of an element to be hidden on all sites, e.g. `##.ad`. + #[inline] + pub fn simple_class_rules( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_SIMPLE_CLASS_RULES, None) + .unwrap() + } + } + /// Rules that are just the CSS id of an element to be hidden on all sites, e.g. `###banner`. + #[inline] + pub fn simple_id_rules( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_SIMPLE_ID_RULES, None) + .unwrap() + } + } + /// Rules that are the CSS selector of an element to be hidden on all sites that do not fit + /// into any of the class or id buckets, e.g. `##a[href="https://malware.com"]` + #[inline] + pub fn misc_generic_selectors( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_MISC_GENERIC_SELECTORS, None) + .unwrap() + } + } + /// Complex class rules - CSS selectors starting with a class, e.g. `##.ad image` + /// These are stored as a multi-map from class name to list of selectors + #[inline] + pub fn complex_class_rules_index( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_COMPLEX_CLASS_RULES_INDEX, None) + .unwrap() + } + } + #[inline] + pub fn complex_class_rules_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_COMPLEX_CLASS_RULES_VALUES, None) + .unwrap() + } + } + /// Complex id rules - CSS selectors starting with an id, e.g. `###banner > .text a` + /// These are stored as a multi-map from id name to list of selectors + #[inline] + pub fn complex_id_rules_index( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_COMPLEX_ID_RULES_INDEX, None) + .unwrap() + } + } + #[inline] + pub fn complex_id_rules_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_COMPLEX_ID_RULES_VALUES, None) + .unwrap() + } + } + /// Hostname-specific hide filters - multi-map from hostname hash to CSS selectors + #[inline] + pub fn hostname_hide_index(&self) -> flatbuffers::Vector<'a, u64> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + CosmeticFilters::VT_HOSTNAME_HIDE_INDEX, + None, + ) + .unwrap() + } + } + #[inline] + pub fn hostname_hide_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_HOSTNAME_HIDE_VALUES, None) + .unwrap() + } + } + /// Hostname-specific script injection filters - multi-map from hostname hash to script data + /// First byte of each script encodes permission bits to avoid separate permissions array + #[inline] + pub fn hostname_inject_script_index(&self) -> flatbuffers::Vector<'a, u64> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_INDEX, + None, + ) + .unwrap() + } + } + #[inline] + pub fn hostname_inject_script_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_VALUES, None) + .unwrap() + } + } + #[inline] + pub fn hostname_index(&self) -> flatbuffers::Vector<'a, u64> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + CosmeticFilters::VT_HOSTNAME_INDEX, + None, + ) + .unwrap() + } + } + #[inline] + pub fn hostname_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> + { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::, + >, + >>(CosmeticFilters::VT_HOSTNAME_VALUES, None) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for CosmeticFilters<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>, + >>("simple_class_rules", Self::VT_SIMPLE_CLASS_RULES, true)? + .visit_field::>, + >>("simple_id_rules", Self::VT_SIMPLE_ID_RULES, true)? + .visit_field::>, + >>( + "misc_generic_selectors", + Self::VT_MISC_GENERIC_SELECTORS, + true, + )? + .visit_field::>, + >>( + "complex_class_rules_index", + Self::VT_COMPLEX_CLASS_RULES_INDEX, + true, + )? + .visit_field::>, + >>( + "complex_class_rules_values", + Self::VT_COMPLEX_CLASS_RULES_VALUES, + true, + )? + .visit_field::>, + >>( + "complex_id_rules_index", + Self::VT_COMPLEX_ID_RULES_INDEX, + true, + )? + .visit_field::>, + >>( + "complex_id_rules_values", + Self::VT_COMPLEX_ID_RULES_VALUES, + true, + )? + .visit_field::>>( + "hostname_hide_index", + Self::VT_HOSTNAME_HIDE_INDEX, + true, + )? + .visit_field::>, + >>("hostname_hide_values", Self::VT_HOSTNAME_HIDE_VALUES, true)? + .visit_field::>>( + "hostname_inject_script_index", + Self::VT_HOSTNAME_INJECT_SCRIPT_INDEX, + true, + )? + .visit_field::>, + >>( + "hostname_inject_script_values", + Self::VT_HOSTNAME_INJECT_SCRIPT_VALUES, + true, + )? + .visit_field::>>( + "hostname_index", + Self::VT_HOSTNAME_INDEX, + true, + )? + .visit_field::>, + >>("hostname_values", Self::VT_HOSTNAME_VALUES, true)? + .finish(); + Ok(()) + } + } + pub struct CosmeticFiltersArgs<'a> { + pub simple_class_rules: Option< + flatbuffers::WIPOffset>>, + >, + pub simple_id_rules: Option< + flatbuffers::WIPOffset>>, + >, + pub misc_generic_selectors: Option< + flatbuffers::WIPOffset>>, + >, + pub complex_class_rules_index: Option< + flatbuffers::WIPOffset>>, + >, + pub complex_class_rules_values: Option< + flatbuffers::WIPOffset>>, + >, + pub complex_id_rules_index: Option< + flatbuffers::WIPOffset>>, + >, + pub complex_id_rules_values: Option< + flatbuffers::WIPOffset>>, + >, + pub hostname_hide_index: Option>>, + pub hostname_hide_values: Option< + flatbuffers::WIPOffset>>, + >, + pub hostname_inject_script_index: + Option>>, + pub hostname_inject_script_values: Option< + flatbuffers::WIPOffset>>, + >, + pub hostname_index: Option>>, + pub hostname_values: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, + } + impl<'a> Default for CosmeticFiltersArgs<'a> { + #[inline] + fn default() -> Self { + CosmeticFiltersArgs { + simple_class_rules: None, // required field + simple_id_rules: None, // required field + misc_generic_selectors: None, // required field + complex_class_rules_index: None, // required field + complex_class_rules_values: None, // required field + complex_id_rules_index: None, // required field + complex_id_rules_values: None, // required field + hostname_hide_index: None, // required field + hostname_hide_values: None, // required field + hostname_inject_script_index: None, // required field + hostname_inject_script_values: None, // required field + hostname_index: None, // required field + hostname_values: None, // required field + } + } + } + + pub struct CosmeticFiltersBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> CosmeticFiltersBuilder<'a, 'b, A> { + #[inline] + pub fn add_simple_class_rules( + &mut self, + simple_class_rules: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_SIMPLE_CLASS_RULES, + simple_class_rules, + ); + } + #[inline] + pub fn add_simple_id_rules( + &mut self, + simple_id_rules: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_SIMPLE_ID_RULES, + simple_id_rules, + ); + } + #[inline] + pub fn add_misc_generic_selectors( + &mut self, + misc_generic_selectors: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_MISC_GENERIC_SELECTORS, + misc_generic_selectors, + ); + } + #[inline] + pub fn add_complex_class_rules_index( + &mut self, + complex_class_rules_index: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_COMPLEX_CLASS_RULES_INDEX, + complex_class_rules_index, + ); + } + #[inline] + pub fn add_complex_class_rules_values( + &mut self, + complex_class_rules_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_COMPLEX_CLASS_RULES_VALUES, + complex_class_rules_values, + ); + } + #[inline] + pub fn add_complex_id_rules_index( + &mut self, + complex_id_rules_index: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_COMPLEX_ID_RULES_INDEX, + complex_id_rules_index, + ); + } + #[inline] + pub fn add_complex_id_rules_values( + &mut self, + complex_id_rules_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_COMPLEX_ID_RULES_VALUES, + complex_id_rules_values, + ); + } + #[inline] + pub fn add_hostname_hide_index( + &mut self, + hostname_hide_index: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_HIDE_INDEX, + hostname_hide_index, + ); + } + #[inline] + pub fn add_hostname_hide_values( + &mut self, + hostname_hide_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_HIDE_VALUES, + hostname_hide_values, + ); + } + #[inline] + pub fn add_hostname_inject_script_index( + &mut self, + hostname_inject_script_index: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_INDEX, + hostname_inject_script_index, + ); + } + #[inline] + pub fn add_hostname_inject_script_values( + &mut self, + hostname_inject_script_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_VALUES, + hostname_inject_script_values, + ); + } + #[inline] + pub fn add_hostname_index( + &mut self, + hostname_index: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_INDEX, + hostname_index, + ); + } + #[inline] + pub fn add_hostname_values( + &mut self, + hostname_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_VALUES, + hostname_values, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> CosmeticFiltersBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + CosmeticFiltersBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required( + o, + CosmeticFilters::VT_SIMPLE_CLASS_RULES, + "simple_class_rules", + ); + self.fbb_ + .required(o, CosmeticFilters::VT_SIMPLE_ID_RULES, "simple_id_rules"); + self.fbb_.required( + o, + CosmeticFilters::VT_MISC_GENERIC_SELECTORS, + "misc_generic_selectors", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_COMPLEX_CLASS_RULES_INDEX, + "complex_class_rules_index", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_COMPLEX_CLASS_RULES_VALUES, + "complex_class_rules_values", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_COMPLEX_ID_RULES_INDEX, + "complex_id_rules_index", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_COMPLEX_ID_RULES_VALUES, + "complex_id_rules_values", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_HOSTNAME_HIDE_INDEX, + "hostname_hide_index", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_HOSTNAME_HIDE_VALUES, + "hostname_hide_values", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_INDEX, + "hostname_inject_script_index", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_VALUES, + "hostname_inject_script_values", + ); + self.fbb_ + .required(o, CosmeticFilters::VT_HOSTNAME_INDEX, "hostname_index"); + self.fbb_ + .required(o, CosmeticFilters::VT_HOSTNAME_VALUES, "hostname_values"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for CosmeticFilters<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("CosmeticFilters"); + ds.field("simple_class_rules", &self.simple_class_rules()); + ds.field("simple_id_rules", &self.simple_id_rules()); + ds.field("misc_generic_selectors", &self.misc_generic_selectors()); + ds.field( + "complex_class_rules_index", + &self.complex_class_rules_index(), + ); + ds.field( + "complex_class_rules_values", + &self.complex_class_rules_values(), + ); + ds.field("complex_id_rules_index", &self.complex_id_rules_index()); + ds.field("complex_id_rules_values", &self.complex_id_rules_values()); + ds.field("hostname_hide_index", &self.hostname_hide_index()); + ds.field("hostname_hide_values", &self.hostname_hide_values()); + ds.field( + "hostname_inject_script_index", + &self.hostname_inject_script_index(), + ); + ds.field( + "hostname_inject_script_values", + &self.hostname_inject_script_values(), + ); + ds.field("hostname_index", &self.hostname_index()); + ds.field("hostname_values", &self.hostname_values()); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct CosmeticFiltersT { + pub simple_class_rules: Vec, + pub simple_id_rules: Vec, + pub misc_generic_selectors: Vec, + pub complex_class_rules_index: Vec, + pub complex_class_rules_values: Vec, + pub complex_id_rules_index: Vec, + pub complex_id_rules_values: Vec, + pub hostname_hide_index: Vec, + pub hostname_hide_values: Vec, + pub hostname_inject_script_index: Vec, + pub hostname_inject_script_values: Vec, + pub hostname_index: Vec, + pub hostname_values: Vec, + } + impl Default for CosmeticFiltersT { + fn default() -> Self { + Self { + simple_class_rules: Default::default(), + simple_id_rules: Default::default(), + misc_generic_selectors: Default::default(), + complex_class_rules_index: Default::default(), + complex_class_rules_values: Default::default(), + complex_id_rules_index: Default::default(), + complex_id_rules_values: Default::default(), + hostname_hide_index: Default::default(), + hostname_hide_values: Default::default(), + hostname_inject_script_index: Default::default(), + hostname_inject_script_values: Default::default(), + hostname_index: Default::default(), + hostname_values: Default::default(), + } + } + } + impl CosmeticFiltersT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let simple_class_rules = Some({ + let x = &self.simple_class_rules; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let simple_id_rules = Some({ + let x = &self.simple_id_rules; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let misc_generic_selectors = Some({ + let x = &self.misc_generic_selectors; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let complex_class_rules_index = Some({ + let x = &self.complex_class_rules_index; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let complex_class_rules_values = Some({ + let x = &self.complex_class_rules_values; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let complex_id_rules_index = Some({ + let x = &self.complex_id_rules_index; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let complex_id_rules_values = Some({ + let x = &self.complex_id_rules_values; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let hostname_hide_index = Some({ + let x = &self.hostname_hide_index; + _fbb.create_vector(x) + }); + let hostname_hide_values = Some({ + let x = &self.hostname_hide_values; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let hostname_inject_script_index = Some({ + let x = &self.hostname_inject_script_index; + _fbb.create_vector(x) + }); + let hostname_inject_script_values = Some({ + let x = &self.hostname_inject_script_values; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let hostname_index = Some({ + let x = &self.hostname_index; + _fbb.create_vector(x) + }); + let hostname_values = Some({ + let x = &self.hostname_values; + let w: Vec<_> = x.iter().map(|t| t.pack(_fbb)).collect(); + _fbb.create_vector(&w) + }); + CosmeticFilters::create( + _fbb, + &CosmeticFiltersArgs { + simple_class_rules, + simple_id_rules, + misc_generic_selectors, + complex_class_rules_index, + complex_class_rules_values, + complex_id_rules_index, + complex_id_rules_values, + hostname_hide_index, + hostname_hide_values, + hostname_inject_script_index, + hostname_inject_script_values, + hostname_index, + hostname_values, + }, + ) + } + } pub enum EngineOffset {} #[derive(Copy, Clone, PartialEq)] @@ -672,8 +1879,10 @@ pub mod fb { } impl<'a> Engine<'a> { - pub const VT_NETWORK_RULES: flatbuffers::VOffsetT = 4; - pub const VT_UNIQUE_DOMAINS_HASHES: flatbuffers::VOffsetT = 6; + pub const VT_VERSION: flatbuffers::VOffsetT = 4; + pub const VT_NETWORK_RULES: flatbuffers::VOffsetT = 6; + pub const VT_UNIQUE_DOMAINS_HASHES: flatbuffers::VOffsetT = 8; + pub const VT_COSMETIC_FILTERS: flatbuffers::VOffsetT = 10; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { @@ -690,16 +1899,21 @@ pub mod fb { args: &'args EngineArgs<'args>, ) -> flatbuffers::WIPOffset> { let mut builder = EngineBuilder::new(_fbb); + if let Some(x) = args.cosmetic_filters { + builder.add_cosmetic_filters(x); + } if let Some(x) = args.unique_domains_hashes { builder.add_unique_domains_hashes(x); } if let Some(x) = args.network_rules { builder.add_network_rules(x); } + builder.add_version(args.version); builder.finish() } pub fn unpack(&self) -> EngineT { + let version = self.version(); let network_rules = { let x = self.network_rules(); x.iter().map(|t| t.unpack()).collect() @@ -708,12 +1922,25 @@ pub mod fb { let x = self.unique_domains_hashes(); x.into_iter().collect() }; + let cosmetic_filters = { + let x = self.cosmetic_filters(); + Box::new(x.unpack()) + }; EngineT { + version, network_rules, unique_domains_hashes, + cosmetic_filters, } } + #[inline] + pub fn version(&self) -> u32 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(Engine::VT_VERSION, Some(0)).unwrap() } + } #[inline] pub fn network_rules( &self, @@ -743,6 +1970,20 @@ pub mod fb { .unwrap() } } + #[inline] + pub fn cosmetic_filters(&self) -> CosmeticFilters<'a> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>( + Engine::VT_COSMETIC_FILTERS, + None, + ) + .unwrap() + } + } } impl flatbuffers::Verifiable for Engine<'_> { @@ -753,6 +1994,7 @@ pub mod fb { ) -> Result<(), flatbuffers::InvalidFlatbuffer> { use self::flatbuffers::Verifiable; v.visit_table(pos)? + .visit_field::("version", Self::VT_VERSION, false)? .visit_field::>, >>("network_rules", Self::VT_NETWORK_RULES, true)? @@ -761,24 +2003,33 @@ pub mod fb { Self::VT_UNIQUE_DOMAINS_HASHES, true, )? + .visit_field::>( + "cosmetic_filters", + Self::VT_COSMETIC_FILTERS, + true, + )? .finish(); Ok(()) } } pub struct EngineArgs<'a> { + pub version: u32, pub network_rules: Option< flatbuffers::WIPOffset< flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, >, >, pub unique_domains_hashes: Option>>, + pub cosmetic_filters: Option>>, } impl<'a> Default for EngineArgs<'a> { #[inline] fn default() -> Self { EngineArgs { + version: 0, network_rules: None, // required field unique_domains_hashes: None, // required field + cosmetic_filters: None, // required field } } } @@ -788,6 +2039,10 @@ pub mod fb { start_: flatbuffers::WIPOffset, } impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> EngineBuilder<'a, 'b, A> { + #[inline] + pub fn add_version(&mut self, version: u32) { + self.fbb_.push_slot::(Engine::VT_VERSION, version, 0); + } #[inline] pub fn add_network_rules( &mut self, @@ -811,6 +2066,17 @@ pub mod fb { ); } #[inline] + pub fn add_cosmetic_filters( + &mut self, + cosmetic_filters: flatbuffers::WIPOffset>, + ) { + self.fbb_ + .push_slot_always::>( + Engine::VT_COSMETIC_FILTERS, + cosmetic_filters, + ); + } + #[inline] pub fn new( _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, ) -> EngineBuilder<'a, 'b, A> { @@ -827,6 +2093,8 @@ pub mod fb { .required(o, Engine::VT_NETWORK_RULES, "network_rules"); self.fbb_ .required(o, Engine::VT_UNIQUE_DOMAINS_HASHES, "unique_domains_hashes"); + self.fbb_ + .required(o, Engine::VT_COSMETIC_FILTERS, "cosmetic_filters"); flatbuffers::WIPOffset::new(o.value()) } } @@ -834,22 +2102,28 @@ pub mod fb { impl core::fmt::Debug for Engine<'_> { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let mut ds = f.debug_struct("Engine"); + ds.field("version", &self.version()); ds.field("network_rules", &self.network_rules()); ds.field("unique_domains_hashes", &self.unique_domains_hashes()); + ds.field("cosmetic_filters", &self.cosmetic_filters()); ds.finish() } } #[non_exhaustive] #[derive(Debug, Clone, PartialEq)] pub struct EngineT { + pub version: u32, pub network_rules: Vec, pub unique_domains_hashes: Vec, + pub cosmetic_filters: Box, } impl Default for EngineT { fn default() -> Self { Self { + version: 0, network_rules: Default::default(), unique_domains_hashes: Default::default(), + cosmetic_filters: Default::default(), } } } @@ -858,6 +2132,7 @@ pub mod fb { &self, _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, ) -> flatbuffers::WIPOffset> { + let version = self.version; let network_rules = Some({ let x = &self.network_rules; let w: Vec<_> = x.iter().map(|t| t.pack(_fbb)).collect(); @@ -867,11 +2142,17 @@ pub mod fb { let x = &self.unique_domains_hashes; _fbb.create_vector(x) }); + let cosmetic_filters = Some({ + let x = &self.cosmetic_filters; + x.pack(_fbb) + }); Engine::create( _fbb, &EngineArgs { + version, network_rules, unique_domains_hashes, + cosmetic_filters, }, ) } diff --git a/src/flatbuffers/unsafe_tools.rs b/src/flatbuffers/unsafe_tools.rs index f53edaa5..48623853 100644 --- a/src/flatbuffers/unsafe_tools.rs +++ b/src/flatbuffers/unsafe_tools.rs @@ -3,15 +3,14 @@ use crate::filters::fb_network::flat::fb; // Minimum alignment for the beginning of the flatbuffer data. -// Should be 4 while we support armv7 and x86_32. -const MIN_ALIGNMENT: usize = 4; +const MIN_ALIGNMENT: usize = 8; /// Converts a flatbuffers Vector to a slice. /// # Safety /// This function uses unsafe code to convert flatbuffer vector bytes to a slice. /// It asserts the vector data is properly aligned and sized. #[inline(always)] -pub fn fb_vector_to_slice(vector: flatbuffers::Vector<'_, T>) -> &[T] { +pub fn fb_vector_to_slice<'a, T>(vector: &flatbuffers::Vector<'a, T>) -> &'a [T] { let bytes = vector.bytes(); const fn static_assert_alignment() { @@ -20,7 +19,7 @@ pub fn fb_vector_to_slice(vector: flatbuffers::Vector<'_, T>) -> &[T] { // the alignment of the data must be a divisor of MIN_ALIGNMENT. assert!(MIN_ALIGNMENT % std::mem::size_of::() == 0); } - let _ = static_assert_alignment::; + const { static_assert_alignment::() }; assert!(bytes.len() % std::mem::size_of::() == 0); assert!(bytes.as_ptr() as usize % std::mem::align_of::() == 0); diff --git a/src/lib.rs b/src/lib.rs index d6327d2d..0d95e5f0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,6 @@ pub mod blocker; #[cfg(feature = "content-blocking")] pub mod content_blocking; pub mod cosmetic_filter_cache; -mod data_format; mod engine; pub mod filters; mod flatbuffers; diff --git a/src/network_filter_list.rs b/src/network_filter_list.rs index c0d2f6dc..9bb076db 100644 --- a/src/network_filter_list.rs +++ b/src/network_filter_list.rs @@ -50,12 +50,6 @@ impl NetworkFilterMaskHelper for CheckResult { } } -#[derive(Debug, Clone)] -pub enum NetworkFilterListParsingError { - InvalidFlatbuffer(flatbuffers::InvalidFlatbuffer), - UniqueDomainsOutOfBounds(usize), -} - /// Internal structure to keep track of a collection of network filters. pub(crate) struct NetworkFilterList<'a> { pub(crate) list: fb::NetworkFilterList<'a>, @@ -69,7 +63,7 @@ impl NetworkFilterList<'_> { pub fn get_filter_map(&self) -> FlatNetworkFilterMap { let filters_list = &self.list; FlatNetworkFilterMap::new( - fb_vector_to_slice(filters_list.filter_map_index()), + fb_vector_to_slice(&filters_list.filter_map_index()), filters_list.filter_map_values(), ) } diff --git a/src/resources/mod.rs b/src/resources/mod.rs index 7fda7af2..eb7c2321 100644 --- a/src/resources/mod.rs +++ b/src/resources/mod.rs @@ -101,6 +101,10 @@ impl PermissionMask { Self(bits) } + pub fn to_bits(&self) -> u8 { + self.0 + } + /// Can `filter_mask` authorize injecting a resource requiring `self` permissions? pub fn is_injectable_by(&self, filter_mask: PermissionMask) -> bool { // For any particular bit index, the scriptlet is injectable if: diff --git a/tests/legacy_harness.rs b/tests/legacy_harness.rs index a11d5449..37cf5be8 100644 --- a/tests/legacy_harness.rs +++ b/tests/legacy_harness.rs @@ -330,7 +330,7 @@ mod legacy_check_match { let mut engine_deserialized = Engine::default(); // second empty engine_deserialized.use_tags(tags); { - let engine_serialized = engine.serialize().unwrap(); + let engine_serialized = engine.serialize().to_vec(); engine_deserialized.deserialize(&engine_serialized).unwrap(); // override from serialized copy } @@ -404,7 +404,7 @@ mod legacy_check_match { ); let mut engine_deserialized = Engine::default(); // second empty { - let engine_serialized = engine.serialize().unwrap(); + let engine_serialized = engine.serialize().to_vec(); engine_deserialized.deserialize(&engine_serialized).unwrap(); // override from serialized copy } @@ -898,7 +898,7 @@ mod legacy_misc_tests { false, ); // enable debugging and disable optimizations - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); let mut engine2 = Engine::default(); engine2.deserialize(&serialized).unwrap(); diff --git a/tests/live.rs b/tests/live.rs index 8cfa7a8c..d6f1505b 100644 --- a/tests/live.rs +++ b/tests/live.rs @@ -283,11 +283,11 @@ fn check_live_redirects() { /// deserializing from it. fn stable_serialization_through_load() { let engine1 = Engine::from_filter_set(ALL_FILTERS.lock().unwrap().clone(), true); - let ser1 = engine1.serialize().unwrap(); + let ser1 = engine1.serialize().to_vec(); let mut engine2 = Engine::default(); engine2.deserialize(&ser1).unwrap(); - let ser2 = engine2.serialize().unwrap(); + let ser2 = engine2.serialize().to_vec(); assert_eq!(ser1, ser2); } diff --git a/tests/ublock-coverage.rs b/tests/ublock-coverage.rs index 2c3b406a..94aa7aca 100644 --- a/tests/ublock-coverage.rs +++ b/tests/ublock-coverage.rs @@ -174,7 +174,7 @@ fn check_specifics_default() { #[test] fn check_basic_works_after_deserialization() { let engine = get_blocker_engine(); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); let mut deserialized_engine = Engine::default(); deserialized_engine.deserialize(&serialized).unwrap(); diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs index ae7b6cea..9cdfca11 100644 --- a/tests/unit/engine.rs +++ b/tests/unit/engine.rs @@ -153,10 +153,10 @@ mod tests { let mut engine = Engine::from_rules(filters, Default::default()); engine.enable_tags(&["stuff"]); engine.enable_tags(&["brian"]); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize(); let mut deserialized_engine = Engine::default(); deserialized_engine.enable_tags(&["stuff"]); - deserialized_engine.deserialize(&serialized).unwrap(); + deserialized_engine.deserialize(serialized).unwrap(); url_results.into_iter().for_each(|(url, expected_result)| { let request = Request::new(url, "", "").unwrap(); @@ -182,8 +182,8 @@ mod tests { #[test] fn deserialization_generate_simple() { let mut engine = Engine::from_rules(["ad-banner"], Default::default()); - let data = engine.serialize().unwrap(); - const EXPECTED_HASH: u64 = 14059407383857257100; + let data = engine.serialize().to_vec(); + const EXPECTED_HASH: u64 = 10824009878088122438; assert_eq!(hash(&data), EXPECTED_HASH, "{}", HASH_MISMATCH_MSG); engine.deserialize(&data).unwrap(); } @@ -192,8 +192,8 @@ mod tests { fn deserialization_generate_tags() { let mut engine = Engine::from_rules(["ad-banner$tag=abc"], Default::default()); engine.use_tags(&["abc"]); - let data = engine.serialize().unwrap(); - const EXPECTED_HASH: u64 = 1772924818985173219; + let data = engine.serialize().to_vec(); + const EXPECTED_HASH: u64 = 14013192499527032437; assert_eq!(hash(&data), EXPECTED_HASH, "{}", HASH_MISMATCH_MSG); engine.deserialize(&data).unwrap(); } @@ -207,7 +207,7 @@ mod tests { Resource::simple("noopcss", MimeType::TextCss, ""), ]); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); println!("Engine serialized: {:?}", serialized); engine.deserialize(&serialized).unwrap(); } @@ -216,12 +216,12 @@ mod tests { fn deserialization_brave_list() { let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]); let mut engine = Engine::from_rules_parametrised(rules, Default::default(), false, true); - let data = engine.serialize().unwrap(); + let data = engine.serialize().to_vec(); - let expected_hash = if cfg!(feature = "css-validation") { - 12046041060659687422 + let expected_hash: u64 = if cfg!(feature = "css-validation") { + 15878451394701322592 } else { - 11420623023091203502 + 1052691436141678847 }; assert_eq!(hash(&data), expected_hash, "{}", HASH_MISMATCH_MSG);