11use core:: ops:: {
22 Bound ,
3+ Range ,
34 RangeBounds ,
45} ;
56
@@ -14,6 +15,18 @@ use crate::{
1415 StaticStack ,
1516} ;
1617
18+ /// Hinting where the pattern might be matching the target
19+ enum MatchHint {
20+ /// A match hint could not be generated
21+ Unsupported ,
22+
23+ /// The pattern will not match the target
24+ NoMatches ,
25+
26+ /// The pattern may match the target at the given offset
27+ MaybeMatch ( usize ) ,
28+ }
29+
1730/// The `BinaryMatcher` is responsible for searching a [BinaryPattern] within a [MatchTarget].
1831///
1932/// Use [`BinaryMatcher::next_match`] to iterate through matches of the specified pattern.
@@ -199,7 +212,7 @@ impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
199212 return Some ( data_cursor) ;
200213 }
201214 }
202-
215+
203216 self . save_stack . truncate ( save_stack_size) ;
204217 self . cursor_stack . truncate ( cursor_stack_size) ;
205218 if let Some ( data_cursor) = {
@@ -282,6 +295,66 @@ impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
282295 Some ( data_cursor)
283296 }
284297
298+ /// Generate a match hint for a proper search based of the first matching bytes
299+ /// given by the pattern. This algorithm assumes that thet MatchTarget is in continuous memory.
300+ fn next_match_hint ( & self , range : Range < usize > ) -> MatchHint {
301+ let mut fs_buffer = [ 0u8 ; 0x10 ] ;
302+ let mut fs_buffer_len = 0 ;
303+ for atom in self . pattern_atoms {
304+ match atom {
305+ Atom :: ByteSequence { seq_start, seq_end } => {
306+ let seq_start = * seq_start as usize ;
307+ let seq_end = * seq_end as usize ;
308+
309+ let copy_length = ( seq_end - seq_start) . min ( fs_buffer. len ( ) - fs_buffer_len) ;
310+ fs_buffer[ fs_buffer_len..fs_buffer_len + copy_length] . copy_from_slice (
311+ & self . pattern_byte_sequence [ seq_start..seq_start + copy_length] ,
312+ ) ;
313+ fs_buffer_len += copy_length;
314+ if fs_buffer_len >= fs_buffer. len ( ) {
315+ /* quick search buffer filled */
316+ break ;
317+ }
318+ }
319+ Atom :: CursorPush => continue ,
320+ Atom :: SaveConstant ( _) => continue ,
321+ Atom :: SaveCursor => continue ,
322+ Atom :: Read ( _) => continue ,
323+ _ => break ,
324+ }
325+ }
326+
327+ if fs_buffer_len == 0 {
328+ /* can not berform a fuzzy search as we do not start with any binary data */
329+ return MatchHint :: Unsupported ;
330+ }
331+
332+ let Some ( target_buffer) = self . target . subrange ( range. start , range. end - range. start ) else {
333+ /* memory is not continuous */
334+ return MatchHint :: Unsupported ;
335+ } ;
336+
337+ Self :: fuzzy_search ( & fs_buffer[ 0 ..fs_buffer_len] , target_buffer)
338+ . map_or ( MatchHint :: NoMatches , |offset| {
339+ MatchHint :: MaybeMatch ( range. start + offset)
340+ } )
341+ }
342+
343+ fn fuzzy_search ( needle : & [ u8 ] , haystack : & [ u8 ] ) -> Option < usize > {
344+ for offset in 0 ..( haystack. len ( ) - needle. len ( ) ) {
345+ let is_match = needle
346+ . iter ( )
347+ . zip ( & haystack[ offset..offset + needle. len ( ) ] )
348+ . all ( |( a, b) | * a == * b) ;
349+
350+ if is_match {
351+ return Some ( offset) ;
352+ }
353+ }
354+
355+ None
356+ }
357+
285358 /// Finds the next match for the associated [BinaryPattern] within the [MatchTarget].
286359 ///
287360 /// # Returns
@@ -296,24 +369,57 @@ impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
296369 /// Finds the next match for the associated [BinaryPattern] within the [MatchTarget] within the given range.
297370 /// The current match offset will be clamped into the given range.
298371 pub fn next_match_within < R : RangeBounds < usize > > ( & mut self , range : R ) -> Option < & [ u32 ] > {
299- let range_min = match range. start_bound ( ) {
372+ let range_start = match range. start_bound ( ) {
300373 Bound :: Excluded ( value) => * value + 1 ,
301374 Bound :: Included ( value) => * value,
302375 Bound :: Unbounded => 0 ,
303376 } ;
304377
305- let range_max = match range. end_bound ( ) {
378+ let range_end = match range. end_bound ( ) {
306379 Bound :: Excluded ( value) => * value,
307380 Bound :: Included ( value) => * value + 1 ,
308381 Bound :: Unbounded => self . target . match_length ( ) ,
309382 } ;
310- if range_min >= range_max {
383+ if range_start >= range_end {
311384 /* nothing to match against */
312385 return None ;
313386 }
314387
315- let match_offset = self . match_offset . clamp ( range_min, range_max) ;
316- for match_offset in match_offset..range_max {
388+ let mut match_offset = self . match_offset . clamp ( range_start, range_end) ;
389+ while match_offset < range_end {
390+ match self . next_match_hint ( match_offset..range_end) {
391+ MatchHint :: Unsupported => {
392+ /* fall back to matching against every position */
393+ return self . next_match_within_loop ( match_offset..range_end) ;
394+ }
395+ MatchHint :: NoMatches => {
396+ /* no more matches */
397+ return None ;
398+ }
399+ MatchHint :: MaybeMatch ( hint_offset) => {
400+ /* check if the given offset is actually a match */
401+ self . save_stack . truncate ( 1 ) ;
402+ self . cursor_stack . truncate ( 0 ) ;
403+
404+ if self . match_atoms ( hint_offset, self . pattern_atoms ) . is_some ( ) {
405+ self . match_offset = hint_offset + 1 ;
406+
407+ let save_stack = self . save_stack . stack_mut ( ) ;
408+ save_stack[ 0 ] = hint_offset as u32 ;
409+ return Some ( save_stack) ;
410+ }
411+
412+ match_offset = hint_offset + 1 ;
413+ }
414+ }
415+ }
416+
417+ self . match_offset = range_end;
418+ None
419+ }
420+
421+ fn next_match_within_loop ( & mut self , range : Range < usize > ) -> Option < & [ u32 ] > {
422+ for match_offset in range. clone ( ) {
317423 self . save_stack . truncate ( 1 ) ;
318424 self . cursor_stack . truncate ( 0 ) ;
319425
@@ -328,7 +434,7 @@ impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
328434 return Some ( save_stack) ;
329435 }
330436
331- self . match_offset = range_max ;
437+ self . match_offset = range . end ;
332438 None
333439 }
334440}
0 commit comments