Skip to content

Commit 45b0371

Browse files
authored
Flat containers (#498)
* Unsafe tools moved to flatbuffers. * Added FlatMultiMapView, FlatFilterMap replaced. * Added FlatSetView. * Removed unused utils. * Temporary marked FlatSet as dead_code. * Added test for FlatSetView. * Added FlatMultiMapView tests. * Clean up. * Ignore live test. * Clippy fixes. * Review issues are addressed. * Optional iterator for multimap (improve perf). * Removed unused file.
1 parent 0cb2d63 commit 45b0371

File tree

17 files changed

+379
-107
lines changed

17 files changed

+379
-107
lines changed

src/data_format/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ mod storage;
1010
pub(crate) mod utils;
1111

1212
use crate::cosmetic_filter_cache::CosmeticFilterCache;
13-
use crate::filters::unsafe_tools::VerifiedFlatbufferMemory;
13+
use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory;
1414
use crate::network_filter_list::NetworkFilterListParsingError;
1515

1616
/// Newer formats start with this magic byte sequence.

src/data_format/storage.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use rmp_serde as rmps;
1010
use serde::{Deserialize, Serialize};
1111

1212
use crate::cosmetic_filter_cache::{CosmeticFilterCache, HostnameRuleDb, ProceduralOrActionFilter};
13-
use crate::filters::unsafe_tools::VerifiedFlatbufferMemory;
13+
use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory;
1414
use crate::utils::Hash;
1515

1616
use super::utils::{stabilize_hashmap_serialization, stabilize_hashset_serialization};

src/filters/fb_builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use std::vec;
99
use flatbuffers::WIPOffset;
1010

1111
use crate::filters::network::{NetworkFilter, NetworkFilterMaskHelper};
12-
use crate::filters::unsafe_tools::VerifiedFlatbufferMemory;
12+
use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory;
1313
use crate::network_filter_list::token_histogram;
1414
use crate::optimizer;
1515
use crate::utils::{to_short_hash, Hash, ShortHash};

src/filters/fb_network.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::collections::HashMap;
44

55
use crate::filters::fb_builder::FlatBufferBuilder;
66
use crate::filters::network::{NetworkFilterMask, NetworkFilterMaskHelper, NetworkMatchable};
7-
use crate::filters::unsafe_tools::{fb_vector_to_slice, VerifiedFlatbufferMemory};
7+
use crate::flatbuffers::unsafe_tools::{fb_vector_to_slice, VerifiedFlatbufferMemory};
88

99
use crate::regex_manager::RegexManager;
1010
use crate::request::Request;

src/filters/flat_filter_map.rs

Lines changed: 0 additions & 73 deletions
This file was deleted.

src/filters/mod.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,4 @@ mod network_matchers;
66
pub mod cosmetic;
77
pub(crate) mod fb_builder;
88
pub(crate) mod fb_network;
9-
pub(crate) mod flat_filter_map;
109
pub mod network;
11-
pub(crate) mod unsafe_tools;
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
use std::marker::PhantomData;
2+
3+
use crate::flatbuffers::containers::sorted_index::SortedIndex;
4+
use flatbuffers::{Follow, Vector};
5+
6+
/// A map-like container that uses flatbuffer references.
7+
/// Provides O(log n) lookup time using binary search on the sorted index.
8+
/// I is a key type, Keys is specific container of keys, &[I] for fast indexing (u32, u64)
9+
/// and flatbuffers::Vector<I> if there is no conversion from Vector (str) to slice.
10+
pub(crate) struct FlatMultiMapView<'a, I: Ord, V, Keys>
11+
where
12+
Keys: SortedIndex<I>,
13+
V: Follow<'a>,
14+
{
15+
keys: Keys,
16+
values: Vector<'a, V>,
17+
_phantom: PhantomData<I>,
18+
}
19+
20+
impl<'a, I: Ord + Copy, V, Keys> FlatMultiMapView<'a, I, V, Keys>
21+
where
22+
Keys: SortedIndex<I> + Clone,
23+
V: Follow<'a>,
24+
{
25+
pub fn new(keys: Keys, values: Vector<'a, V>) -> Self {
26+
debug_assert!(keys.len() == values.len());
27+
28+
Self {
29+
keys,
30+
values,
31+
_phantom: PhantomData,
32+
}
33+
}
34+
35+
pub fn get(&self, key: I) -> Option<FlatMultiMapViewIterator<'a, I, V, Keys>> {
36+
let index = self.keys.partition_point(|x| *x < key);
37+
if index < self.keys.len() && self.keys.get(index) == key {
38+
Some(FlatMultiMapViewIterator {
39+
index,
40+
key,
41+
keys: self.keys.clone(), // Cloning is 3-4% faster than & in benchmarks
42+
values: self.values,
43+
})
44+
} else {
45+
None
46+
}
47+
}
48+
49+
#[cfg(test)]
50+
pub fn total_size(&self) -> usize {
51+
self.keys.len()
52+
}
53+
}
54+
55+
pub(crate) struct FlatMultiMapViewIterator<'a, I: Ord + Copy, V, Keys>
56+
where
57+
Keys: SortedIndex<I>,
58+
V: Follow<'a>,
59+
{
60+
index: usize,
61+
key: I,
62+
keys: Keys,
63+
values: Vector<'a, V>,
64+
}
65+
66+
impl<'a, I, V, Keys> Iterator for FlatMultiMapViewIterator<'a, I, V, Keys>
67+
where
68+
I: Ord + Copy,
69+
V: Follow<'a>,
70+
Keys: SortedIndex<I>,
71+
{
72+
type Item = (usize, <V as Follow<'a>>::Inner);
73+
74+
fn next(&mut self) -> Option<Self::Item> {
75+
if self.index < self.keys.len() && self.keys.get(self.index) == self.key {
76+
self.index += 1;
77+
Some((self.index - 1, self.values.get(self.index - 1)))
78+
} else {
79+
None
80+
}
81+
}
82+
}
83+
84+
#[cfg(test)]
85+
#[path = "../../../tests/unit/flatbuffers/containers/flat_multimap.rs"]
86+
mod unit_tests;
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#![allow(dead_code)]
2+
3+
use std::marker::PhantomData;
4+
5+
use crate::flatbuffers::containers::sorted_index::SortedIndex;
6+
7+
/// A set-like container that uses flatbuffer references.
8+
/// Provides O(log n) lookup time using binary search on the sorted data.
9+
/// I is a key type, Keys is specific container of keys, &[I] for fast indexing (u32, u64)
10+
/// and flatbuffers::Vector<I> if there is no conversion from Vector (str) to slice.
11+
pub(crate) struct FlatSetView<I, Keys>
12+
where
13+
Keys: SortedIndex<I>,
14+
{
15+
keys: Keys,
16+
_phantom: PhantomData<I>,
17+
}
18+
19+
impl<I, Keys> FlatSetView<I, Keys>
20+
where
21+
I: Ord,
22+
Keys: SortedIndex<I>,
23+
{
24+
pub fn new(keys: Keys) -> Self {
25+
Self {
26+
keys,
27+
_phantom: PhantomData,
28+
}
29+
}
30+
31+
pub fn contains(&self, key: I) -> bool {
32+
let index = self.keys.partition_point(|x| *x < key);
33+
index < self.keys.len() && self.keys.get(index) == key
34+
}
35+
36+
#[inline(always)]
37+
pub fn len(&self) -> usize {
38+
self.keys.len()
39+
}
40+
41+
#[inline(always)]
42+
pub fn is_empty(&self) -> bool {
43+
self.len() == 0
44+
}
45+
}
46+
47+
#[cfg(test)]
48+
#[path = "../../../tests/unit/flatbuffers/containers/flat_set.rs"]
49+
mod unit_tests;

src/flatbuffers/containers/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
pub(crate) mod flat_multimap;
2+
pub(crate) mod flat_set;
3+
pub(crate) mod sorted_index;
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
use flatbuffers::{Follow, Vector};
2+
3+
// Represents sorted sequence to perform the binary search.
4+
pub(crate) trait SortedIndex<I> {
5+
fn len(&self) -> usize;
6+
fn get(&self, index: usize) -> I;
7+
fn partition_point<F>(&self, predicate: F) -> usize
8+
where
9+
F: FnMut(&I) -> bool;
10+
}
11+
12+
// Implementation for slices. Prefer using this with fb_vector_to_slice
13+
// if possible, because it faster than getting values with flatbuffer's
14+
// get method.
15+
impl<I: Ord + Copy> SortedIndex<I> for &[I] {
16+
#[inline(always)]
17+
fn len(&self) -> usize {
18+
<[I]>::len(self)
19+
}
20+
21+
#[inline(always)]
22+
fn get(&self, index: usize) -> I {
23+
self[index]
24+
}
25+
26+
#[inline(always)]
27+
fn partition_point<F>(&self, predicate: F) -> usize
28+
where
29+
F: FnMut(&I) -> bool,
30+
{
31+
debug_assert!(self.is_sorted());
32+
<[I]>::partition_point(self, predicate)
33+
}
34+
}
35+
36+
// General implementation for flatbuffers::Vector, it uses get to
37+
// obtain values.
38+
impl<'a, T: Follow<'a>> SortedIndex<T::Inner> for Vector<'a, T>
39+
where
40+
T::Inner: Ord,
41+
{
42+
#[inline(always)]
43+
fn len(&self) -> usize {
44+
Vector::len(self)
45+
}
46+
47+
#[inline(always)]
48+
fn get(&self, index: usize) -> T::Inner {
49+
Vector::get(self, index)
50+
}
51+
52+
fn partition_point<F>(&self, mut predicate: F) -> usize
53+
where
54+
F: FnMut(&T::Inner) -> bool,
55+
{
56+
debug_assert!(self.iter().is_sorted());
57+
58+
let mut left = 0;
59+
let mut right = self.len();
60+
61+
while left < right {
62+
let mid = left + (right - left) / 2;
63+
let value = self.get(mid);
64+
if predicate(&value) {
65+
left = mid + 1;
66+
} else {
67+
right = mid;
68+
}
69+
}
70+
71+
left
72+
}
73+
}

0 commit comments

Comments
 (0)