Skip to content

Commit ecd3adb

Browse files
authored
Use the one flatbuffer to store all lists [0.11.x] (#493)
The PR moves from per-NetworkList flatbuffers to a one (per-Engine). It doesn't affect the performance metrics, but opens a possibility to put cosmetic filters to the same flatbuffer. It also simplifies the serialization/deserialization code.
1 parent 4738d3f commit ecd3adb

22 files changed

+852
-765
lines changed

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ harness = false
9393
[features]
9494
# If disabling default features, consider explicitly re-enabling the
9595
# "embedded-domain-resolver" feature.
96-
default = ["embedded-domain-resolver", "full-regex-handling", "unsync-regex-caching"]
96+
default = ["embedded-domain-resolver", "full-regex-handling", "single-thread"]
9797
full-regex-handling = []
98-
unsync-regex-caching = [] # disables `Send` and `Sync` on `Engine`.
98+
single-thread = [] # disables `Send` and `Sync` on `Engine`.
9999
regex-debug-info = []
100100
css-validation = ["cssparser", "selectors"]
101101
content-blocking = []

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ By default, `adblock-rust` ships with a built-in domain resolution implementatio
5252
`adblock-rust` uses uBlock Origin-compatible resources for scriptlet injection and redirect rules.
5353
The `resource-assembler` feature allows `adblock-rust` to parse these resources directly from the file formats used by the uBlock Origin repository.
5454

55-
#### Thread safety (`unsync-regex-caching`)
55+
#### Thread safety (`single-thread`)
5656

57-
The `unsync-regex-caching` feature enables optimizations for rule matching speed and the amount of memory used by the engine.
57+
The `single-thread` feature enables optimizations for rule matching speed and the amount of memory used by the engine.
5858
This feature can be disabled to make the engine `Send + Sync`, although it is recommended to only access the engine on a single thread to maintain optimal performance.

benches/bench_matching.rs

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,9 @@ use criterion::*;
22

33
use serde::{Deserialize, Serialize};
44

5-
use adblock::blocker::{Blocker, BlockerOptions};
65
use adblock::request::Request;
7-
use adblock::resources::ResourceStorage;
86
use adblock::url_parser::parse_url;
9-
use adblock::Engine;
7+
use adblock::{Engine, FilterSet};
108

119
#[path = "../tests/test_utils.rs"]
1210
mod test_utils;
@@ -36,14 +34,13 @@ fn load_requests() -> Vec<TestRequest> {
3634
reqs
3735
}
3836

39-
fn get_blocker(rules: impl IntoIterator<Item = impl AsRef<str>>) -> Blocker {
37+
fn get_engine(rules: impl IntoIterator<Item = impl AsRef<str>>) -> Engine {
4038
let (network_filters, _) = adblock::lists::parse_filters(rules, false, Default::default());
4139

42-
let blocker_options = BlockerOptions {
43-
enable_optimizations: true,
44-
};
45-
46-
Blocker::new(network_filters, &blocker_options)
40+
Engine::from_filter_set(
41+
FilterSet::new_with_rules(network_filters, vec![], false),
42+
true,
43+
)
4744
}
4845

4946
fn bench_rule_matching(engine: &Engine, requests: &[TestRequest]) -> (u32, u32) {
@@ -61,15 +58,11 @@ fn bench_rule_matching(engine: &Engine, requests: &[TestRequest]) -> (u32, u32)
6158
(matches, passes)
6259
}
6360

64-
fn bench_matching_only(
65-
blocker: &Blocker,
66-
resources: &ResourceStorage,
67-
requests: &[Request],
68-
) -> (u32, u32) {
61+
fn bench_matching_only(engine: &Engine, requests: &[Request]) -> (u32, u32) {
6962
let mut matches = 0;
7063
let mut passes = 0;
7164
requests.iter().for_each(|parsed| {
72-
let check = blocker.check(parsed, resources);
65+
let check = engine.check_network_request(parsed);
7366
if check.matched {
7467
matches += 1;
7568
} else {
@@ -150,14 +143,13 @@ fn rule_match_parsed_el(c: &mut Criterion) {
150143
.filter_map(Result::ok)
151144
.collect();
152145
let requests_len = requests_parsed.len() as u64;
153-
let blocker = get_blocker(rules);
154-
let resources = ResourceStorage::default();
146+
let engine = get_engine(rules);
155147

156148
group.throughput(Throughput::Elements(requests_len));
157149
group.sample_size(10);
158150

159151
group.bench_function("easylist", move |b| {
160-
b.iter(|| bench_matching_only(&blocker, &resources, &requests_parsed))
152+
b.iter(|| bench_matching_only(&engine, &requests_parsed))
161153
});
162154

163155
group.finish();
@@ -170,8 +162,7 @@ fn rule_match_parsed_elep_slimlist(c: &mut Criterion) {
170162
"data/easylist.to/easylist/easylist.txt",
171163
"data/easylist.to/easylist/easyprivacy.txt",
172164
]);
173-
let blocker = get_blocker(full_rules);
174-
let resources = ResourceStorage::default();
165+
let engine = get_engine(full_rules);
175166

176167
let requests = load_requests();
177168
let requests_parsed: Vec<_> = requests
@@ -182,7 +173,7 @@ fn rule_match_parsed_elep_slimlist(c: &mut Criterion) {
182173
let requests_len = requests_parsed.len() as u64;
183174

184175
let slim_rules = rules_from_lists(&["data/slim-list.txt"]);
185-
let slim_blocker = get_blocker(slim_rules);
176+
let slim_engine = get_engine(slim_rules);
186177

187178
let requests_copy = load_requests();
188179
let requests_parsed_copy: Vec<_> = requests_copy
@@ -195,11 +186,10 @@ fn rule_match_parsed_elep_slimlist(c: &mut Criterion) {
195186
group.sample_size(10);
196187

197188
group.bench_function("el+ep", move |b| {
198-
b.iter(|| bench_matching_only(&blocker, &resources, &requests_parsed))
189+
b.iter(|| bench_matching_only(&engine, &requests_parsed))
199190
});
200-
let resources = ResourceStorage::default();
201191
group.bench_function("slimlist", move |b| {
202-
b.iter(|| bench_matching_only(&slim_blocker, &resources, &requests_parsed_copy))
192+
b.iter(|| bench_matching_only(&slim_engine, &requests_parsed_copy))
203193
});
204194

205195
group.finish();

benches/bench_redirect_performance.rs

Lines changed: 16 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1+
use adblock::{Engine, FilterSet};
12
use criterion::*;
23
use tokio::runtime::Runtime;
34

4-
use adblock::blocker::{Blocker, BlockerOptions};
55
use adblock::filters::network::{NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper};
66
use adblock::request::Request;
7-
use adblock::resources::ResourceStorage;
7+
use adblock::resources::Resource;
88

99
const DEFAULT_LISTS_URL: &str =
1010
"https://raw.githubusercontent.com/brave/adblock-resources/master/filter_lists/list_catalog.json";
@@ -84,18 +84,13 @@ fn get_redirect_rules() -> Vec<NetworkFilter> {
8484
.collect()
8585
}
8686

87-
/// Loads the supplied rules, and the test set of resources, into a Blocker
88-
fn get_preloaded_blocker(rules: Vec<NetworkFilter>) -> Blocker {
89-
let blocker_options = BlockerOptions {
90-
enable_optimizations: true,
91-
};
92-
93-
Blocker::new(rules, &blocker_options)
87+
/// Loads the supplied rules, and the test set of resources, into a Engine
88+
fn get_preloaded_engine(rules: Vec<NetworkFilter>) -> Engine {
89+
let filter_set = FilterSet::new_with_rules(rules, vec![], false);
90+
Engine::from_filter_set(filter_set, true /* optimize */)
9491
}
9592

96-
fn build_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> ResourceStorage {
97-
let mut resources = ResourceStorage::default();
98-
93+
fn get_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> Vec<Resource> {
9994
#[cfg(feature = "resource-assembler")]
10095
{
10196
use adblock::resources::resource_assembler::assemble_web_accessible_resources;
@@ -111,10 +106,7 @@ fn build_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> Re
111106
"data/test/fake-uBO-files/scriptlets.js",
112107
)),
113108
);
114-
115-
resource_data.into_iter().for_each(|resource| {
116-
let _res = resources.add_resource(resource);
117-
});
109+
resource_data
118110
}
119111

120112
#[cfg(not(feature = "resource-assembler"))]
@@ -141,12 +133,8 @@ fn build_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> Re
141133
permission: Default::default(),
142134
}
143135
})
144-
.for_each(|resource| {
145-
let _res = resources.add_resource(resource);
146-
});
136+
.collect()
147137
}
148-
149-
resources
150138
}
151139

152140
/// Maps network filter rules into `Request`s that would trigger those rules
@@ -211,9 +199,9 @@ pub fn build_custom_requests(rules: Vec<NetworkFilter>) -> Vec<Request> {
211199
.collect::<Vec<_>>()
212200
}
213201

214-
fn bench_fn(blocker: &Blocker, resources: &ResourceStorage, requests: &[Request]) {
202+
fn bench_fn(engine: &Engine, requests: &[Request]) {
215203
requests.iter().for_each(|request| {
216-
let block_result = blocker.check(request, resources);
204+
let block_result = engine.check_network_request(request);
217205
assert!(
218206
block_result.redirect.is_some(),
219207
"{:?}, {:?}",
@@ -228,16 +216,18 @@ fn redirect_performance(c: &mut Criterion) {
228216

229217
let rules = get_redirect_rules();
230218

231-
let blocker = get_preloaded_blocker(rules.clone());
232-
let resources = build_resources_for_filters(&rules);
219+
let mut engine = get_preloaded_engine(rules.clone());
220+
let resources = get_resources_for_filters(&rules);
221+
engine.use_resources(resources);
222+
233223
let requests = build_custom_requests(rules.clone());
234224
let requests_len = requests.len() as u64;
235225

236226
group.throughput(Throughput::Elements(requests_len));
237227
group.sample_size(10);
238228

239229
group.bench_function("without_alias_lookup", move |b| {
240-
b.iter(|| bench_fn(&blocker, &resources, &requests))
230+
b.iter(|| bench_fn(&engine, &requests))
241231
});
242232

243233
group.finish();

benches/bench_rules.rs

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
use criterion::*;
22
use once_cell::sync::Lazy;
33

4-
use adblock::blocker::{Blocker, BlockerOptions};
5-
use adblock::Engine;
4+
use adblock::{Engine, FilterSet};
65

76
#[path = "../tests/test_utils.rs"]
87
mod test_utils;
@@ -79,14 +78,13 @@ fn list_parse(c: &mut Criterion) {
7978
group.finish();
8079
}
8180

82-
fn get_blocker(rules: impl IntoIterator<Item = impl AsRef<str>>) -> Blocker {
81+
fn get_engine(rules: impl IntoIterator<Item = impl AsRef<str>>) -> Engine {
8382
let (network_filters, _) = adblock::lists::parse_filters(rules, false, Default::default());
8483

85-
let blocker_options = BlockerOptions {
86-
enable_optimizations: true,
87-
};
88-
89-
Blocker::new(network_filters, &blocker_options)
84+
Engine::from_filter_set(
85+
FilterSet::new_with_rules(network_filters, vec![], false),
86+
true,
87+
)
9088
}
9189

9290
fn blocker_new(c: &mut Criterion) {
@@ -104,9 +102,9 @@ fn blocker_new(c: &mut Criterion) {
104102
let engine = Engine::from_rules(&brave_list_rules, Default::default());
105103
let engine_serialized = engine.serialize().unwrap();
106104

107-
group.bench_function("el+ep", move |b| b.iter(|| get_blocker(&easylist_rules)));
105+
group.bench_function("el+ep", move |b| b.iter(|| get_engine(&easylist_rules)));
108106
group.bench_function("brave-list", move |b| {
109-
b.iter(|| get_blocker(&brave_list_rules))
107+
b.iter(|| get_engine(&brave_list_rules))
110108
});
111109
group.bench_function("brave-list-deserialize", move |b| {
112110
b.iter(|| {

0 commit comments

Comments
 (0)