1+ use std:: collections:: BTreeSet ;
12use std:: fs:: File ;
23use std:: io:: { BufReader , Read , Seek , SeekFrom } ;
3- use std:: ops:: Range ;
4+
5+ use super :: CustomRange ;
46
57#[ derive( Debug , Clone ) ]
68pub struct Matches {
7- indexes : Vec < Range < usize > > ,
9+ indexes : Vec < usize > ,
10+ context_bytes_indexes : BTreeSet < CustomRange > ,
811 data : Vec < u8 > ,
9- offset : Vec < usize > ,
10- pattern_len : usize ,
1112 context_bytes_size : usize ,
12- curr_context_bytes_indexes : Option < Range < usize > > ,
1313}
1414
1515impl Matches {
16- pub fn new ( pattern_len : usize , context_bytes_size : usize ) -> Self {
16+ pub fn new ( context_bytes_size : usize ) -> Self {
1717 Self {
18- pattern_len,
1918 context_bytes_size,
2019 indexes : Vec :: new ( ) ,
20+ context_bytes_indexes : BTreeSet :: new ( ) ,
2121 data : Vec :: new ( ) ,
22- offset : Vec :: new ( ) ,
23- curr_context_bytes_indexes : None ,
2422 }
2523 }
2624
27- pub fn offset ( & self ) -> & [ usize ] {
28- & self . offset
25+ pub fn context_bytes_indexes ( & self ) -> & BTreeSet < CustomRange > {
26+ & self . context_bytes_indexes
2927 }
3028
3129 /// Get a reference to the match's index.
32- pub fn indexes ( & self ) -> & [ Range < usize > ] {
30+ pub fn indexes ( & self ) -> & [ usize ] {
3331 & self . indexes
3432 }
3533
3634 /// Get a reference to the match's data.
37- pub fn data ( & self ) -> & [ u8 ] {
38- & self . data
35+ pub fn get_data ( & self , index : usize ) -> u8 {
36+ * self . data . get ( index) . unwrap ( )
37+ }
38+
39+ pub fn data_len ( & self ) -> usize {
40+ self . data . len ( )
3941 }
4042
4143 pub fn is_empty ( & self ) -> bool {
4244 self . data . is_empty ( ) && self . indexes . is_empty ( ) && self . indexes . is_empty ( )
4345 }
4446
45- fn populate_matches ( & mut self , index : usize , pos_in_file : usize , buffer : & [ u8 ] ) {
46- // index where we should start collecting bytes for context
47- let offset = index - ( index % self . context_bytes_size ) ;
48-
49- // search_for_slice only return the index where the match start so we need to
50- // create a range with all indexes from the match
51- let match_indexes = index..index + self . pattern_len ;
52-
53- // Creates the index range for the context bytes.
54- // context_bytes_size_indexes can contain all the indexes for the match or partially, depends on
55- // context_bytes_size and the pattern size
56- let mut context_bytes_indexes = if offset + self . context_bytes_size <= buffer. len ( ) {
57- offset..offset + self . context_bytes_size
58- } else {
59- offset..buffer. len ( )
60- } ;
61-
62- // In case context_bytes_size doesn't contain all of the match indexes we
63- // need to extend the end of the range
64- if context_bytes_indexes. end < match_indexes. end {
65- context_bytes_indexes. end += self . context_bytes_size ;
66- }
67-
68- let context_bytes_indexes = Some ( context_bytes_indexes) ;
69- if context_bytes_indexes != self . curr_context_bytes_indexes { // Check if context_bytes_indexes was already added
70- self . curr_context_bytes_indexes = context_bytes_indexes. clone ( ) ;
71-
72- // The actual bytes for context + the matching bytes
73- // needed for printing the result
74- self . data . extend_from_slice ( & buffer[ context_bytes_indexes. unwrap ( ) ] ) ;
75-
76- // The index is relative to the position in the current buffer we are
77- // reading from the file, but we need to store the position relative to the
78- // whole file
79- self . offset . push ( index + pos_in_file) ;
80- }
81-
82- // Now we need to know the indexes of the match inside of context_bytes
83- let mut match_indexes = match_indexes. start % self . data . len ( )
84- ..match_indexes. end % self . data . len ( ) ;
85-
86- if match_indexes. end < match_indexes. start {
87- match_indexes. end = match_indexes. start + self . pattern_len ;
47+ fn populate_matches ( & mut self , indexes : & [ usize ] , buffer : & [ u8 ] ) {
48+ for index in indexes {
49+ // index where we should start collecting bytes for context
50+ let offset = index - ( index % self . context_bytes_size ) ;
51+
52+ // Creates the index range for the context bytes.
53+ let context_bytes_indexes = if offset + self . context_bytes_size <= buffer. len ( ) {
54+ CustomRange :: new ( offset..offset + self . context_bytes_size )
55+ } else {
56+ CustomRange :: new ( offset..buffer. len ( ) )
57+ } ;
58+
59+ let bytes = & buffer[ context_bytes_indexes. range . start ..context_bytes_indexes. range . end ] ;
60+ if self . context_bytes_indexes . insert ( context_bytes_indexes) {
61+ // The actual bytes for context + the matching bytes
62+ // needed for printing the result
63+ self . data . extend_from_slice ( bytes) ;
64+ }
8865 }
8966
90- self . indexes . push ( match_indexes ) ;
67+ self . indexes . extend_from_slice ( indexes ) ;
9168 }
9269}
9370
@@ -104,7 +81,7 @@ impl<'a> Searcher<'a> {
10481 pub fn new ( pattern : & ' a [ u8 ] , context_bytes_size : usize , skip_bytes : u64 ) -> Self {
10582 Self {
10683 pattern,
107- matches : Matches :: new ( pattern . len ( ) , context_bytes_size) ,
84+ matches : Matches :: new ( context_bytes_size) ,
10885 context_bytes_size,
10986 skip_bytes,
11087 }
@@ -114,21 +91,19 @@ impl<'a> Searcher<'a> {
11491 let mut file = File :: open ( filepath) ?;
11592 let file_size = file. metadata ( ) . unwrap ( ) . len ( ) as usize ;
11693
117- let mut pos_in_file = file. seek ( SeekFrom :: Start ( self . skip_bytes ) ) . unwrap_or ( 0 ) as usize ;
94+ let _pos_in_file = file. seek ( SeekFrom :: Start ( self . skip_bytes ) ) . unwrap_or ( 0 ) as usize ;
11895 let mut reader = BufReader :: new ( file) ;
11996
12097 if file_size < self . context_bytes_size {
12198 self . context_bytes_size = file_size;
12299 }
123100
124101 if file_size <= Self :: BUFFER_SIZE {
125- let mut buffer = Vec :: new ( ) ;
102+ let mut buffer = Vec :: with_capacity ( Self :: BUFFER_SIZE ) ;
126103 reader. read_to_end ( & mut buffer) ?;
127104
128105 let result = Self :: search_slice ( & buffer, self . pattern ) ;
129- for index in result {
130- self . matches . populate_matches ( index, 0 , & buffer) ;
131- }
106+ self . matches . populate_matches ( & result, & buffer) ;
132107 } else {
133108 let mut buffer = [ 0 ; Self :: BUFFER_SIZE ] ;
134109 loop {
@@ -139,10 +114,9 @@ impl<'a> Searcher<'a> {
139114 }
140115
141116 let result = Self :: search_slice ( & buffer, self . pattern ) ;
142- for index in result {
143- self . matches . populate_matches ( index, pos_in_file, & buffer) ;
144- }
145- pos_in_file += Self :: BUFFER_SIZE ;
117+ self . matches . populate_matches ( & result, & buffer) ;
118+
119+ // pos_in_file += Self::BUFFER_SIZE;
146120 }
147121 }
148122
@@ -164,7 +138,9 @@ impl<'a> Searcher<'a> {
164138
165139 if slice[ curr_pos_pattern] == ch {
166140 if curr_pos_pattern == slice. len ( ) - 1 {
167- match_indexes. push ( i - curr_pos_pattern) ;
141+ let pos = i - curr_pos_pattern;
142+ match_indexes
143+ . extend_from_slice ( & ( pos..pos + slice. len ( ) ) . collect :: < Vec < usize > > ( ) ) ;
168144 curr_pos_pattern = table_of_ocurrencies[ curr_pos_pattern] ;
169145 } else {
170146 curr_pos_pattern += 1 ;
0 commit comments