Skip to content

Commit cef6094

Browse files
committed
Enable cosmetic filter benchmarks, add resources (#500)
* Enable cosmetic filter benchmarks, add resources * Update the component name
1 parent 45b0371 commit cef6094

File tree

6 files changed

+98
-118
lines changed

6 files changed

+98
-118
lines changed

.github/scripts/run-benchmarks.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,6 @@ cargo bench --bench bench_rules blocker_new/brave-list -- --output-format benche
1616

1717
echo "Running memory usage benchmark..."
1818
cargo bench --bench bench_memory memory-usage -- --output-format bencher
19+
20+
echo "Running cosmetic matching benchmark..."
21+
cargo bench --bench bench_cosmetic_matching -- --output-format bencher

Cargo.toml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,9 @@ harness = false
8484
name = "bench_serialization"
8585
harness = false
8686

87-
# Currently disabled, as cosmetic filter internals
88-
# are no longer part of the crate's public API
89-
#[[bench]]
90-
#name = "bench_cosmetic_matching"
91-
#harness = false
87+
[[bench]]
88+
name = "bench_cosmetic_matching"
89+
harness = false
9290

9391
[features]
9492
# If disabling default features, consider explicitly re-enabling the

benches/bench_cosmetic_matching.rs

Lines changed: 49 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,40 @@
1-
#![cfg(any())] // This attribute disables the entire module
1+
use adblock::Engine;
22
use criterion::*;
33

4-
use adblock::cosmetic_filter_cache::CosmeticFilterCache;
5-
use adblock::lists::{parse_filters, FilterFormat};
6-
74
#[path = "../tests/test_utils.rs"]
85
mod test_utils;
9-
use test_utils::rules_from_lists;
6+
7+
pub fn make_engine() -> Engine {
8+
use adblock::resources::Resource;
9+
10+
let rules = test_utils::rules_from_lists(&["data/brave/brave-main-list.txt"]);
11+
let resource_json = std::fs::read_to_string("data/brave/brave-resources.json").unwrap();
12+
let resource_list: Vec<Resource> = serde_json::from_str(&resource_json).unwrap();
13+
let mut engine = Engine::from_rules_parametrised(rules, Default::default(), true, true);
14+
engine.use_resources(resource_list);
15+
engine
16+
}
17+
18+
const TEST_URLS: [&str; 3] = [
19+
"https://search.brave.com/search?q=test",
20+
"https://mail.google.com/",
21+
"https://google.com",
22+
];
1023

1124
fn by_hostname(c: &mut Criterion) {
12-
let mut group = c.benchmark_group("cosmetic-hostname-match");
25+
let mut group = c.benchmark_group("url_cosmetic_resources");
1326

1427
group.throughput(Throughput::Elements(1));
1528
group.sample_size(20);
1629

17-
group.bench_function("easylist", move |b| {
18-
let rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);
19-
let (_, cosmetic_filters) = parse_filters(&rules, false, FilterFormat::Standard);
20-
let cfcache = CosmeticFilterCache::from_rules(cosmetic_filters);
21-
b.iter(|| cfcache.hostname_cosmetic_resources("google.com"))
22-
});
23-
group.bench_function("many lists", move |b| {
24-
let rules = rules_from_lists(&[
25-
"data/easylist.to/easylist/easylist.txt",
26-
"data/easylist.to/easylistgermany/easylistgermany.txt",
27-
"data/uBlockOrigin/filters.txt",
28-
"data/uBlockOrigin/unbreak.txt",
29-
]);
30-
let (_, cosmetic_filters) = parse_filters(&rules, false, FilterFormat::Standard);
31-
let cfcache = CosmeticFilterCache::from_rules(cosmetic_filters);
32-
b.iter(|| cfcache.hostname_cosmetic_resources("google.com"))
33-
});
34-
group.bench_function("complex_hostname", move |b| {
35-
let rules = rules_from_lists(&[
36-
"data/easylist.to/easylist/easylist.txt",
37-
"data/easylist.to/easylistgermany/easylistgermany.txt",
38-
"data/uBlockOrigin/filters.txt",
39-
"data/uBlockOrigin/unbreak.txt",
40-
]);
41-
let (_, cosmetic_filters) = parse_filters(&rules, false, FilterFormat::Standard);
42-
let cfcache = CosmeticFilterCache::from_rules(cosmetic_filters);
43-
b.iter(|| cfcache.hostname_cosmetic_resources("ads.serve.1.domain.google.com"))
30+
group.bench_function("brave-list", move |b| {
31+
let engine = make_engine();
32+
b.iter(|| {
33+
TEST_URLS
34+
.iter()
35+
.map(|url| engine.url_cosmetic_resources(url))
36+
.collect::<Vec<_>>()
37+
})
4438
});
4539

4640
group.finish();
@@ -52,68 +46,29 @@ fn by_classes_ids(c: &mut Criterion) {
5246
group.throughput(Throughput::Elements(1));
5347
group.sample_size(20);
5448

55-
group.bench_function("easylist", move |b| {
56-
let rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);
57-
let (_, cosmetic_filters) = parse_filters(&rules, false, FilterFormat::Standard);
58-
let cfcache = CosmeticFilterCache::from_rules(cosmetic_filters);
59-
let exceptions = Default::default();
60-
b.iter(|| cfcache.hidden_class_id_selectors(&["ad"], &["ad"], &exceptions))
61-
});
62-
group.bench_function("many lists", move |b| {
63-
let rules = rules_from_lists(&[
64-
"data/easylist.to/easylist/easylist.txt",
65-
"data/easylist.to/easylistgermany/easylistgermany.txt",
66-
"data/uBlockOrigin/filters.txt",
67-
"data/uBlockOrigin/unbreak.txt",
68-
]);
69-
let (_, cosmetic_filters) = parse_filters(&rules, false, FilterFormat::Standard);
70-
let cfcache = CosmeticFilterCache::from_rules(cosmetic_filters);
71-
let exceptions = Default::default();
72-
b.iter(|| cfcache.hidden_class_id_selectors(&["ad"], &["ad"], &exceptions))
73-
});
74-
group.bench_function("many matching classes and ids", move |b| {
75-
let rules = rules_from_lists(&[
76-
"data/easylist.to/easylist/easylist.txt",
77-
"data/easylist.to/easylistgermany/easylistgermany.txt",
78-
"data/uBlockOrigin/filters.txt",
79-
"data/uBlockOrigin/unbreak.txt",
80-
]);
81-
let (_, cosmetic_filters) = parse_filters(&rules, false, FilterFormat::Standard);
82-
let cfcache = CosmeticFilterCache::from_rules(cosmetic_filters);
83-
let exceptions = Default::default();
84-
let class_list = [
85-
"block-bg-advertisement-region-1",
86-
"photobox-adbox",
87-
"headerad-720",
88-
"rscontainer",
89-
"rail-article-sponsored",
90-
"fbPhotoSnowboxAds",
91-
"sidebar_ad_module",
92-
"ad-728x90_forum",
93-
"commercial-unit-desktop-rhs",
94-
"sponsored-editorial",
95-
"rr-300x600-ad",
96-
"adfoot",
97-
"lads",
98-
];
99-
let id_list = [
100-
"footer-adspace",
101-
"adsponsored_links_box",
102-
"lsadvert-top",
103-
"mn",
104-
"col-right-ad",
105-
"view_ads_bottom_bg_middle",
106-
"ad_468x60",
107-
"rightAdColumn",
108-
"content",
109-
"rhs_block",
110-
"center_col",
111-
"header",
112-
"advertisingModule160x600",
113-
];
114-
b.iter(|| cfcache.hidden_class_id_selectors(&class_list, &id_list, &exceptions))
115-
});
49+
let mut classes = vec![];
50+
let mut ids = vec![];
11651

52+
group.bench_function("brave-list", |b| {
53+
for i in 0..1000 {
54+
classes.push(format!("class{}", i));
55+
ids.push(format!("id{}", i));
56+
}
57+
58+
let engine = make_engine();
59+
let cases = TEST_URLS
60+
.iter()
61+
.map(|url| engine.url_cosmetic_resources(url).exceptions)
62+
.collect::<Vec<_>>();
63+
64+
let engine = make_engine();
65+
b.iter(|| {
66+
cases
67+
.iter()
68+
.map(|e| engine.hidden_class_id_selectors(&classes, &ids, e))
69+
.collect::<Vec<_>>()
70+
})
71+
});
11772
group.finish();
11873
}
11974

benches/bench_memory.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use std::alloc::{GlobalAlloc, Layout, System};
99
use std::sync::atomic::{AtomicUsize, Ordering};
1010

1111
use adblock::request::Request;
12+
use adblock::resources::Resource;
1213
use adblock::Engine;
1314

1415
#[path = "../tests/test_utils.rs"]
@@ -147,7 +148,11 @@ fn bench_memory_usage(c: &mut Criterion) {
147148
for _ in 0..iters {
148149
ALLOCATOR.reset();
149150
let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]);
150-
let engine = Engine::from_rules(rules, Default::default());
151+
let mut engine = Engine::from_rules(rules, Default::default());
152+
let resource_json =
153+
std::fs::read_to_string("data/brave/brave-resources.json").unwrap();
154+
let resource_list: Vec<Resource> = serde_json::from_str(&resource_json).unwrap();
155+
engine.use_resources(resource_list);
151156

152157
if run_requests {
153158
ALLOCATOR.reset();

data/brave/brave-resources.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

data/update-lists.js

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
// A script to update the test lists and resources.
2+
// Use: BRAVE_SERVICE_KEY=<key> node data/update-lists.js <brave_list_version> <resource_list_version>
3+
14
const { execSync } = require("child_process");
25
const fs = require("fs");
36
const path = require("path");
@@ -7,17 +10,22 @@ const args = process.argv.slice(2);
710

811
if (args.length < 2) {
912
console.error(
10-
"Usage: node update-lists.js <Brave Services Key> <target version for brave list (i.e. 1.0.10268)>\n" +
11-
"The component name is 'Brave Ad Block Updater'"
13+
"Usage: BRAVE_SERVICE_KEY=<key> node update-lists.js <brave_list_version> <resource_list_version>\n" +
14+
"The component names are 'Brave Default Adblock Filters' and 'Brave Ad Block Resources Library'"
1215
);
1316
process.exit(1);
1417
}
1518

16-
const apiKey = args[0];
17-
const version = args[1];
19+
const apiKey = process.env["BRAVE_SERVICE_KEY"];
20+
if (!apiKey) {
21+
console.error("Error: BRAVE_SERVICE_KEY is not set");
22+
process.exit(1);
23+
}
24+
const braveVersionNumber = args[0].replace(/\./g, "_");
25+
const resourceVersionNumber = args[1].replace(/\./g, "_");
1826

19-
const versionNumber = version.replace(/\./g, "_");
20-
const extensionId = "iodkpdagapdfkphljnddpjlldadblomo";
27+
const braveMainListId = "iodkpdagapdfkphljnddpjlldadblomo";
28+
const braveResourceListId = "mfddibmblmbccpadfndgakiopmmhebop";
2129

2230
execSync(
2331
"curl -o data/easylist.to/easylist/easylist.txt https://easylist.to/easylist/easylist.txt"
@@ -31,7 +39,7 @@ execSync(
3139

3240
const rootDir = path.join(__dirname, "..");
3341
const tempDir = path.resolve(
34-
fs.mkdtempSync("temp-brave-list", {
42+
fs.mkdtempSync("temp-list", {
3543
dir: rootDir,
3644
})
3745
);
@@ -40,21 +48,31 @@ try {
4048
process.chdir(tempDir);
4149

4250
execSync(
43-
`curl -o extension.zip -H "BraveServiceKey: ${apiKey}" ` +
44-
`https://brave-core-ext.s3.brave.com/release/${extensionId}/extension_${versionNumber}.crx`
51+
`curl -o main_list.zip -H "BraveServiceKey: ${apiKey}" ` +
52+
`https://brave-core-ext.s3.brave.com/release/${braveMainListId}/extension_${braveVersionNumber}.crx`
53+
);
54+
55+
execSync(
56+
`curl -o resources.zip -H "BraveServiceKey: ${apiKey}" ` +
57+
`https://brave-core-ext.s3.brave.com/release/${braveResourceListId}/extension_${resourceVersionNumber}.crx`
4558
);
4659

47-
const listPath = path.join(tempDir, "list.txt");
48-
try {
49-
execSync("unzip extension.zip -d .");
50-
} catch (e) {
51-
// .crx is not a zip file, so we expect an error here.
52-
if (!fs.existsSync(listPath)) {
53-
throw new Error("Failed to find list.txt in extension.zip");
60+
61+
const takeFile = (zipFile, fileName, outputFileName) => {
62+
try {
63+
execSync(`unzip ${zipFile} -d .`);
64+
} catch (e) {
65+
// .crx is not a zip file, so we expect an error here.
66+
if (!fs.existsSync(fileName)) {
67+
throw new Error(`Failed to find ${fileName} in ${zipFile}`);
68+
}
5469
}
70+
fs.renameSync(fileName, path.join(rootDir, "data/brave", outputFileName));
5571
}
5672

57-
fs.renameSync(listPath, path.join(rootDir, "data/brave/brave-main-list.txt"));
73+
takeFile("main_list.zip", "list.txt", "brave-main-list.txt");
74+
takeFile("resources.zip", "resources.json", "brave-resources.json");
75+
5876
} finally {
59-
fs.rmdirSync(tempDir, { recursive: true });
77+
fs.rmSync(tempDir, { recursive: true });
6078
}

0 commit comments

Comments
 (0)