Skip to content

Commit 620e04b

Browse files
committed
feat: implementing a fuzzy search to find potential match targets to speed up matching on continuous memory targets by up to 7x
1 parent 7ba8b59 commit 620e04b

File tree

1 file changed

+113
-7
lines changed

1 file changed

+113
-7
lines changed

bmatcher-core/src/matcher.rs

Lines changed: 113 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use core::ops::{
22
Bound,
3+
Range,
34
RangeBounds,
45
};
56

@@ -14,6 +15,18 @@ use crate::{
1415
StaticStack,
1516
};
1617

18+
/// Hinting where the pattern might be matching the target
19+
enum MatchHint {
20+
/// A match hint could not be generated
21+
Unsupported,
22+
23+
/// The pattern will not match the target
24+
NoMatches,
25+
26+
/// The pattern may match the target at the given offset
27+
MaybeMatch(usize),
28+
}
29+
1730
/// The `BinaryMatcher` is responsible for searching a [BinaryPattern] within a [MatchTarget].
1831
///
1932
/// Use [`BinaryMatcher::next_match`] to iterate through matches of the specified pattern.
@@ -199,7 +212,7 @@ impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
199212
return Some(data_cursor);
200213
}
201214
}
202-
215+
203216
self.save_stack.truncate(save_stack_size);
204217
self.cursor_stack.truncate(cursor_stack_size);
205218
if let Some(data_cursor) = {
@@ -282,6 +295,66 @@ impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
282295
Some(data_cursor)
283296
}
284297

298+
/// Generate a match hint for a proper search based of the first matching bytes
299+
/// given by the pattern. This algorithm assumes that thet MatchTarget is in continuous memory.
300+
fn next_match_hint(&self, range: Range<usize>) -> MatchHint {
301+
let mut fs_buffer = [0u8; 0x10];
302+
let mut fs_buffer_len = 0;
303+
for atom in self.pattern_atoms {
304+
match atom {
305+
Atom::ByteSequence { seq_start, seq_end } => {
306+
let seq_start = *seq_start as usize;
307+
let seq_end = *seq_end as usize;
308+
309+
let copy_length = (seq_end - seq_start).min(fs_buffer.len() - fs_buffer_len);
310+
fs_buffer[fs_buffer_len..fs_buffer_len + copy_length].copy_from_slice(
311+
&self.pattern_byte_sequence[seq_start..seq_start + copy_length],
312+
);
313+
fs_buffer_len += copy_length;
314+
if fs_buffer_len >= fs_buffer.len() {
315+
/* quick search buffer filled */
316+
break;
317+
}
318+
}
319+
Atom::CursorPush => continue,
320+
Atom::SaveConstant(_) => continue,
321+
Atom::SaveCursor => continue,
322+
Atom::Read(_) => continue,
323+
_ => break,
324+
}
325+
}
326+
327+
if fs_buffer_len == 0 {
328+
/* can not berform a fuzzy search as we do not start with any binary data */
329+
return MatchHint::Unsupported;
330+
}
331+
332+
let Some(target_buffer) = self.target.subrange(range.start, range.end - range.start) else {
333+
/* memory is not continuous */
334+
return MatchHint::Unsupported;
335+
};
336+
337+
Self::fuzzy_search(&fs_buffer[0..fs_buffer_len], target_buffer)
338+
.map_or(MatchHint::NoMatches, |offset| {
339+
MatchHint::MaybeMatch(range.start + offset)
340+
})
341+
}
342+
343+
fn fuzzy_search(needle: &[u8], haystack: &[u8]) -> Option<usize> {
344+
for offset in 0..(haystack.len() - needle.len()) {
345+
let is_match = needle
346+
.iter()
347+
.zip(&haystack[offset..offset + needle.len()])
348+
.all(|(a, b)| *a == *b);
349+
350+
if is_match {
351+
return Some(offset);
352+
}
353+
}
354+
355+
None
356+
}
357+
285358
/// Finds the next match for the associated [BinaryPattern] within the [MatchTarget].
286359
///
287360
/// # Returns
@@ -296,24 +369,57 @@ impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
296369
/// Finds the next match for the associated [BinaryPattern] within the [MatchTarget] within the given range.
297370
/// The current match offset will be clamped into the given range.
298371
pub fn next_match_within<R: RangeBounds<usize>>(&mut self, range: R) -> Option<&[u32]> {
299-
let range_min = match range.start_bound() {
372+
let range_start = match range.start_bound() {
300373
Bound::Excluded(value) => *value + 1,
301374
Bound::Included(value) => *value,
302375
Bound::Unbounded => 0,
303376
};
304377

305-
let range_max = match range.end_bound() {
378+
let range_end = match range.end_bound() {
306379
Bound::Excluded(value) => *value,
307380
Bound::Included(value) => *value + 1,
308381
Bound::Unbounded => self.target.match_length(),
309382
};
310-
if range_min >= range_max {
383+
if range_start >= range_end {
311384
/* nothing to match against */
312385
return None;
313386
}
314387

315-
let match_offset = self.match_offset.clamp(range_min, range_max);
316-
for match_offset in match_offset..range_max {
388+
let mut match_offset = self.match_offset.clamp(range_start, range_end);
389+
while match_offset < range_end {
390+
match self.next_match_hint(match_offset..range_end) {
391+
MatchHint::Unsupported => {
392+
/* fall back to matching against every position */
393+
return self.next_match_within_loop(match_offset..range_end);
394+
}
395+
MatchHint::NoMatches => {
396+
/* no more matches */
397+
return None;
398+
}
399+
MatchHint::MaybeMatch(hint_offset) => {
400+
/* check if the given offset is actually a match */
401+
self.save_stack.truncate(1);
402+
self.cursor_stack.truncate(0);
403+
404+
if self.match_atoms(hint_offset, self.pattern_atoms).is_some() {
405+
self.match_offset = hint_offset + 1;
406+
407+
let save_stack = self.save_stack.stack_mut();
408+
save_stack[0] = hint_offset as u32;
409+
return Some(save_stack);
410+
}
411+
412+
match_offset = hint_offset + 1;
413+
}
414+
}
415+
}
416+
417+
self.match_offset = range_end;
418+
None
419+
}
420+
421+
fn next_match_within_loop(&mut self, range: Range<usize>) -> Option<&[u32]> {
422+
for match_offset in range.clone() {
317423
self.save_stack.truncate(1);
318424
self.cursor_stack.truncate(0);
319425

@@ -328,7 +434,7 @@ impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
328434
return Some(save_stack);
329435
}
330436

331-
self.match_offset = range_max;
437+
self.match_offset = range.end;
332438
None
333439
}
334440
}

0 commit comments

Comments
 (0)