Skip to content

Commit 2a70e4d

Browse files
authored
Merge pull request #483 from brave/v0.10-cleanup
v0.10 cleanup
2 parents 33cdf69 + b0a9e8f commit 2a70e4d

File tree

8 files changed

+51
-76
lines changed

8 files changed

+51
-76
lines changed

src/data_format/mod.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ pub(crate) mod utils;
1111

1212
use crate::blocker::Blocker;
1313
use crate::cosmetic_filter_cache::CosmeticFilterCache;
14-
use crate::network_filter_list::FlatBufferParsingError;
14+
use crate::network_filter_list::NetworkFilterListParsingError;
1515

1616
/// Newer formats start with this magic byte sequence.
1717
/// Calculated as the leading 4 bytes of `echo -n 'brave/adblock-rust' | sha512sum`.
@@ -34,8 +34,8 @@ pub enum DeserializationError {
3434
RmpSerdeError(rmp_serde::decode::Error),
3535
UnsupportedFormatVersion(u8),
3636
NoHeaderFound,
37-
LegacyFormatNoLongerSupported, // not used, for backward compatibility
38-
FlatBufferParsingError(FlatBufferParsingError),
37+
FlatBufferParsingError(flatbuffers::InvalidFlatbuffer),
38+
ValidationError,
3939
}
4040

4141
impl From<std::convert::Infallible> for DeserializationError {
@@ -50,9 +50,14 @@ impl From<rmp_serde::decode::Error> for DeserializationError {
5050
}
5151
}
5252

53-
impl From<FlatBufferParsingError> for DeserializationError {
54-
fn from(e: FlatBufferParsingError) -> Self {
55-
Self::FlatBufferParsingError(e)
53+
impl From<NetworkFilterListParsingError> for DeserializationError {
54+
fn from(e: NetworkFilterListParsingError) -> Self {
55+
match e {
56+
NetworkFilterListParsingError::InvalidFlatbuffer(invalid_flatbuffer) => {
57+
Self::FlatBufferParsingError(invalid_flatbuffer)
58+
}
59+
NetworkFilterListParsingError::UniqueDomainsOutOfBounds(_) => Self::ValidationError,
60+
}
5661
}
5762
}
5863

src/engine.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,11 @@ impl Engine {
250250
crate::data_format::serialize_engine(&self.blocker, &self.cosmetic_cache)
251251
}
252252

253-
/// Deserialize the `Engine` from the binary format generated by `Engine::serialize`. The
254-
/// method will automatically select the correct deserialization implementation.
253+
/// Deserialize the `Engine` from the binary format generated by `Engine::serialize`.
254+
///
255+
/// Note that the binary format has a built-in version number that may be incremented. There is
256+
/// no guarantee that later versions of the format will be deserializable across minor versions
257+
/// of adblock-rust; the format is provided only as a caching optimization.
255258
pub fn deserialize(
256259
&mut self,
257260
serialized: &[u8],

src/filters/fb_network.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Flatbuffer-compatible versions of [NetworkFilter] and related functionality.
2+
13
use std::collections::HashMap;
24
use std::vec;
35

@@ -18,6 +20,7 @@ use crate::utils::{Hash, ShortHash};
1820
pub mod flat;
1921
use flat::fb;
2022

23+
/// Builder for [NetworkFilterList].
2124
pub(crate) struct FlatNetworkFiltersListBuilder<'a> {
2225
builder: flatbuffers::FlatBufferBuilder<'a>,
2326
filters: Vec<WIPOffset<fb::NetworkFilter<'a>>>,
@@ -155,6 +158,8 @@ impl FlatNetworkFiltersListBuilder<'_> {
155158
VerifiedFlatFilterListMemory::from_builder(&self.builder)
156159
}
157160
}
161+
162+
/// A list of string parts that can be matched against a URL.
158163
pub(crate) struct FlatPatterns<'a> {
159164
patterns: Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>>>,
160165
}
@@ -177,6 +182,7 @@ impl<'a> FlatPatterns<'a> {
177182
}
178183
}
179184

185+
/// Iterator over [FlatPatterns].
180186
pub(crate) struct FlatPatternsIterator<'a> {
181187
patterns: &'a FlatPatterns<'a>,
182188
len: usize,
@@ -206,6 +212,7 @@ impl ExactSizeIterator for FlatPatternsIterator<'_> {
206212
}
207213
}
208214

215+
/// Internal implementation of [NetworkFilter] that is compatible with flatbuffers.
209216
pub(crate) struct FlatNetworkFilter<'a> {
210217
key: u64,
211218
owner: &'a NetworkFilterList,

src/filters/flat_filter_map.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Holds the implementation of [FlatFilterMap].
2+
13
use flatbuffers::{Follow, ForwardsUOffset, Vector};
24
use std::cmp::PartialOrd;
35

@@ -8,7 +10,7 @@ pub(crate) struct FlatFilterMap<'a, I: PartialOrd + Copy, V> {
810
values: Vector<'a, ForwardsUOffset<V>>,
911
}
1012

11-
/// Iterator over NetworkFilter objects from FlatFilterMap
13+
/// Iterator over NetworkFilter objects from [FlatFilterMap]
1214
pub(crate) struct FlatFilterMapIterator<'a, I: PartialOrd + Copy, V> {
1315
current_index: usize,
1416
key: I,
@@ -39,9 +41,9 @@ where
3941
}
4042

4143
impl<'a, I: PartialOrd + Copy, V> FlatFilterMap<'a, I, V> {
42-
// Construct FlatFilterMap from two vectors:
43-
// - index: sorted array of keys
44-
// - values: array of values, same length as index
44+
/// Construct [FlatFilterMap] from two vectors:
45+
/// - index: sorted array of keys
46+
/// - values: array of values, same length as index
4547
pub fn new(index: &'a [I], values: Vector<'a, ForwardsUOffset<V>>) -> Self {
4648
// Sanity check the size are equal. Note: next() will handle |values| correctly.
4749
debug_assert!(index.len() == values.len());

src/filters/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ mod abstract_network;
44
mod network_matchers;
55

66
pub mod cosmetic;
7-
pub mod fb_network;
7+
pub(crate) mod fb_network;
88
pub(crate) mod flat_filter_map;
99
pub mod network;
1010
pub(crate) mod unsafe_tools;

src/network_filter_list.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Holds the implementation of [NetworkFilterList] and related functionality.
2+
13
use std::{collections::HashMap, collections::HashSet, fmt};
24

35
use crate::filters::fb_network::flat::fb;
@@ -12,6 +14,8 @@ use crate::regex_manager::RegexManager;
1214
use crate::request::Request;
1315
use crate::utils::{fast_hash, to_short_hash, Hash, ShortHash};
1416

17+
/// Holds relevant information from a single matchin gnetwork filter rule as a result of querying a
18+
/// [NetworkFilterList] for a given request.
1519
pub struct CheckResult {
1620
pub filter_mask: NetworkFilterMask,
1721
pub modifier_option: Option<String>,
@@ -46,11 +50,12 @@ impl NetworkFilterMaskHelper for CheckResult {
4650
}
4751

4852
#[derive(Debug, Clone)]
49-
pub enum FlatBufferParsingError {
53+
pub enum NetworkFilterListParsingError {
5054
InvalidFlatbuffer(flatbuffers::InvalidFlatbuffer),
5155
UniqueDomainsOutOfBounds(usize),
5256
}
5357

58+
/// Internal structure to keep track of a collection of network filters.
5459
pub(crate) struct NetworkFilterList {
5560
pub(crate) memory: VerifiedFlatFilterListMemory,
5661
pub(crate) unique_domains_hashes_map: HashMap<Hash, u32>,
@@ -68,19 +73,19 @@ impl Default for NetworkFilterList {
6873
}
6974

7075
impl NetworkFilterList {
71-
/// Create a new NetworkFilterList from raw memory (includes verification).
76+
/// Create a new [NetworkFilterList] from raw memory (includes verification).
7277
pub(crate) fn try_from_unverified_memory(
7378
flatbuffer_memory: Vec<u8>,
74-
) -> Result<NetworkFilterList, FlatBufferParsingError> {
79+
) -> Result<NetworkFilterList, NetworkFilterListParsingError> {
7580
let memory = VerifiedFlatFilterListMemory::from_raw(flatbuffer_memory)
76-
.map_err(FlatBufferParsingError::InvalidFlatbuffer)?;
81+
.map_err(NetworkFilterListParsingError::InvalidFlatbuffer)?;
7782

7883
Self::try_from_verified_memory(memory)
7984
}
8085

8186
pub(crate) fn try_from_verified_memory(
8287
memory: VerifiedFlatFilterListMemory,
83-
) -> Result<NetworkFilterList, FlatBufferParsingError> {
88+
) -> Result<NetworkFilterList, NetworkFilterListParsingError> {
8489
let root = memory.filter_list();
8590

8691
// Reconstruct the unique_domains_hashes_map from the flatbuffer data
@@ -91,7 +96,7 @@ impl NetworkFilterList {
9196
unique_domains_hashes_map.insert(
9297
hash,
9398
u32::try_from(index)
94-
.map_err(|_| FlatBufferParsingError::UniqueDomainsOutOfBounds(index))?,
99+
.map_err(|_| NetworkFilterListParsingError::UniqueDomainsOutOfBounds(index))?,
95100
);
96101
}
97102

@@ -185,6 +190,7 @@ impl NetworkFilterList {
185190

186191
Self::try_from_verified_memory(memory).unwrap_or_default()
187192
}
193+
188194
/// Returns the first found filter, if any, that matches the given request. The backing storage
189195
/// has a non-deterministic order, so this should be used for any category of filters where a
190196
/// match from each would be functionally equivalent. For example, if two different exception

tests/unit/engine.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -173,17 +173,18 @@ mod tests {
173173
});
174174
}
175175

176-
const HASH_MISSMATCH_MSG: &str = r#"
177-
Changed in the serialized format detected!
178-
1. Update ADBLOCK_RUST_DAT_VERSION before updating the expectations
179-
2. DON'T rely on an approach: it's backwards compatible with the old one
176+
const HASH_MISMATCH_MSG: &str = r#"
177+
A change has been detected in the serialized format! If the change is intentional:
178+
1. Update ADBLOCK_RUST_DAT_VERSION before updating the expected hashes
179+
2. DON'T rely on backwards compatibility with the old format
180180
Backwards compatibility isn't covered by the tests"#;
181181

182182
#[test]
183183
fn deserialization_generate_simple() {
184184
let mut engine = Engine::from_rules(["ad-banner"], Default::default());
185185
let data = engine.serialize().unwrap();
186-
assert_eq!(hash(&data), 5723845290597955159, "{}", HASH_MISSMATCH_MSG);
186+
const EXPECTED_HASH: u64 = 5723845290597955159;
187+
assert_eq!(hash(&data), EXPECTED_HASH, "{}", HASH_MISMATCH_MSG);
187188
engine.deserialize(&data).unwrap();
188189
}
189190

@@ -192,7 +193,8 @@ mod tests {
192193
let mut engine = Engine::from_rules(["ad-banner$tag=abc"], Default::default());
193194
engine.use_tags(&["abc"]);
194195
let data = engine.serialize().unwrap();
195-
assert_eq!(hash(&data), 9626816743810307798, "{}", HASH_MISSMATCH_MSG);
196+
const EXPECTED_HASH: u64 = 9626816743810307798;
197+
assert_eq!(hash(&data), EXPECTED_HASH, "{}", HASH_MISMATCH_MSG);
196198
engine.deserialize(&data).unwrap();
197199
}
198200

@@ -222,7 +224,7 @@ mod tests {
222224
6839468684492187294
223225
};
224226

225-
assert_eq!(hash(&data), expected_hash, "{}", HASH_MISSMATCH_MSG);
227+
assert_eq!(hash(&data), expected_hash, "{}", HASH_MISMATCH_MSG);
226228

227229
engine.deserialize(&data).unwrap();
228230
}

tests/unit/filters/network.rs

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,56 +1174,6 @@ mod parse_tests {
11741174
}
11751175
}
11761176

1177-
#[test]
1178-
fn binary_serialization_works() {
1179-
use rmp_serde::{Deserializer, Serializer};
1180-
{
1181-
let filter =
1182-
NetworkFilter::parse("||foo.com/bar/baz$important", true, Default::default())
1183-
.unwrap();
1184-
1185-
let mut encoded = Vec::new();
1186-
filter
1187-
.serialize(&mut Serializer::new(&mut encoded))
1188-
.unwrap();
1189-
let mut de = Deserializer::new(&encoded[..]);
1190-
let decoded: NetworkFilter = Deserialize::deserialize(&mut de).unwrap();
1191-
1192-
let mut defaults = default_network_filter_breakdown();
1193-
defaults.hostname = Some(String::from("foo.com"));
1194-
defaults.filter = Some(String::from("/bar/baz"));
1195-
defaults.is_plain = true;
1196-
defaults.is_hostname_anchor = true;
1197-
defaults.is_important = true;
1198-
defaults.is_left_anchor = true;
1199-
assert_eq!(defaults, NetworkFilterBreakdown::from(&decoded))
1200-
}
1201-
{
1202-
let filter = NetworkFilter::parse("||foo.com*bar^", true, Default::default()).unwrap();
1203-
let mut defaults = default_network_filter_breakdown();
1204-
defaults.hostname = Some(String::from("foo.com"));
1205-
defaults.filter = Some(String::from("bar^"));
1206-
defaults.is_hostname_anchor = true;
1207-
defaults.is_regex = true;
1208-
defaults.is_plain = false;
1209-
1210-
let mut encoded = Vec::new();
1211-
filter
1212-
.serialize(&mut Serializer::new(&mut encoded))
1213-
.unwrap();
1214-
let mut de = Deserializer::new(&encoded[..]);
1215-
let decoded: NetworkFilter = Deserialize::deserialize(&mut de).unwrap();
1216-
1217-
assert_eq!(defaults, NetworkFilterBreakdown::from(&decoded));
1218-
assert!(RegexManager::default().matches(
1219-
decoded.mask,
1220-
decoded.filter.iter(),
1221-
(&decoded as *const NetworkFilter) as u64,
1222-
"bar/"
1223-
));
1224-
}
1225-
}
1226-
12271177
#[test]
12281178
fn parse_empty_host_anchor_exception() {
12291179
let filter_parsed =

0 commit comments

Comments
 (0)