@@ -2,40 +2,94 @@ use std::fs::File;
22use std:: io:: { BufReader , Read } ;
33use std:: ops:: Range ;
44
5- #[ derive( Debug ) ]
6- pub struct Match {
7- index : Range < usize > ,
5+ #[ derive( Debug , Clone ) ]
6+ pub struct Matches {
7+ indexes : Vec < Range < usize > > ,
88 data : Vec < u8 > ,
9- offset : usize ,
9+ offset : Vec < usize > ,
10+ pattern_len : usize ,
11+ context_bytes_size : usize ,
12+ curr_context_bytes_indexes : Option < Range < usize > > ,
1013}
1114
12- impl Match {
13- pub fn new ( offset : usize , index : Range < usize > , data : Vec < u8 > ) -> Self {
15+ impl Matches {
16+ pub fn new ( pattern_len : usize , context_bytes_size : usize ) -> Self {
1417 Self {
15- index,
16- data,
17- offset,
18+ pattern_len,
19+ context_bytes_size,
20+ indexes : Vec :: new ( ) ,
21+ data : Vec :: new ( ) ,
22+ offset : Vec :: new ( ) ,
23+ curr_context_bytes_indexes : None ,
1824 }
1925 }
2026
21- pub fn offset ( & self ) -> usize {
22- self . offset
27+ pub fn offset ( & self ) -> & [ usize ] {
28+ & self . offset
2329 }
2430
2531 /// Get a reference to the match's index.
26- pub fn index ( & self ) -> Range < usize > {
27- self . index . clone ( )
32+ pub fn indexes ( & self ) -> & [ Range < usize > ] {
33+ & self . indexes
2834 }
2935
3036 /// Get a reference to the match's data.
3137 pub fn data ( & self ) -> & [ u8 ] {
3238 & self . data
3339 }
40+
41+ fn populate_matches ( & mut self , index : usize , pos_in_file : usize , buffer : & [ u8 ] ) {
42+ // index where we should start collecting bytes for context
43+ let offset = index - ( index % self . context_bytes_size ) ;
44+
45+ // search_for_slice only return the index where the match start so we need to
46+ // create a range with all indexes from the match
47+ let match_indexes = index..index + self . pattern_len ;
48+
49+ // Creates the index range for the context bytes.
50+ // context_bytes_size_indexes can contain all the indexes for the match or partially, depends on
51+ // context_bytes_size and the pattern size
52+ let mut context_bytes_indexes = if offset + self . context_bytes_size <= buffer. len ( ) {
53+ offset..offset + self . context_bytes_size
54+ } else {
55+ offset..buffer. len ( )
56+ } ;
57+
58+ // In case context_bytes_size doesn't contain all of the match indexes we
59+ // need to extend the end of the range
60+ if context_bytes_indexes. end < match_indexes. end {
61+ context_bytes_indexes. end += self . context_bytes_size ;
62+ }
63+
64+ let context_bytes_indexes = Some ( context_bytes_indexes) ;
65+ if context_bytes_indexes != self . curr_context_bytes_indexes { // Check if context_bytes_indexes was already added
66+ self . curr_context_bytes_indexes = context_bytes_indexes. clone ( ) ;
67+
68+ // The actual bytes for context + the matching bytes
69+ // needed for printing the result
70+ self . data . extend_from_slice ( & buffer[ context_bytes_indexes. unwrap ( ) ] ) ;
71+
72+ // The index is relative to the position in the current buffer we are
73+ // reading from the file, but we need to store the position relative to the
74+ // whole file
75+ self . offset . push ( index + pos_in_file) ;
76+ }
77+
78+ // Now we need to know the indexes of the match inside of context_bytes
79+ let mut match_indexes = match_indexes. start % self . data . len ( )
80+ ..match_indexes. end % self . data . len ( ) ;
81+
82+ if match_indexes. end < match_indexes. start {
83+ match_indexes. end = match_indexes. start + self . pattern_len ;
84+ }
85+
86+ self . indexes . push ( match_indexes) ;
87+ }
3488}
3589
3690pub struct Searcher < ' a > {
3791 pattern : & ' a [ u8 ] ,
38- result : Vec < Vec < Match > > ,
92+ matches : Matches ,
3993 context_bytes_size : usize ,
4094}
4195
@@ -45,72 +99,45 @@ impl<'a> Searcher<'a> {
4599 pub fn new ( pattern : & ' a [ u8 ] , context_bytes_size : usize ) -> Self {
46100 Self {
47101 pattern,
48- result : Vec :: new ( ) ,
102+ matches : Matches :: new ( pattern . len ( ) , context_bytes_size ) ,
49103 context_bytes_size,
50104 }
51105 }
52106
53107 pub fn search_in_file ( & mut self , filepath : & str ) -> std:: io:: Result < ( ) > {
54108 let file = File :: open ( filepath) ?;
109+ let file_size = file. metadata ( ) . unwrap ( ) . len ( ) as usize ;
55110
56111 let mut reader = BufReader :: new ( file) ;
57- let mut buffer = [ 0 ; Self :: BUFFER_SIZE ] ;
58112 let mut pos_in_file = 0 ;
59113
60- loop {
61- let n = reader. read ( & mut buffer) . unwrap ( ) ;
114+ if file_size < self . context_bytes_size {
115+ self . context_bytes_size = file_size;
116+ }
62117
63- if n == 0 {
64- break ;
65- }
118+ if file_size <= Self :: BUFFER_SIZE {
119+ let mut buffer = Vec :: new ( ) ;
120+ reader . read_to_end ( & mut buffer ) ? ;
66121
67122 let result = Self :: search_slice ( & buffer, self . pattern ) ;
123+ for index in result {
124+ self . matches . populate_matches ( index, 0 , & buffer) ;
125+ }
126+ } else {
127+ let mut buffer = [ 0 ; Self :: BUFFER_SIZE ] ;
128+ loop {
129+ let n = reader. read ( & mut buffer) . unwrap ( ) ;
130+
131+ if n == 0 {
132+ break ;
133+ }
68134
69- if !result. is_empty ( ) {
70- // Convert the vector of indexes that match the pattern into Match objects
71- let result = result
72- . iter ( )
73- . map ( |& index| {
74- // index where we should start collecting bytes for context
75- let offset = index - ( index % self . context_bytes_size ) ;
76-
77- // search_for_slice only return the index where the match start so we need to
78- // create a range with all indexes from the match
79- let match_indexes = index..index + self . pattern . len ( ) ;
80-
81- // Creates the index range for the context bytes.
82- // this can contain all the indexes for the match or partially, depends on
83- // context_bytes_size and the pattern size
84- let mut context_bytes_indexes = offset..offset + self . context_bytes_size ;
85-
86- // In case context_bytes_size doesn't contain all of the match indexes we
87- // need to extend the end of the range
88- if context_bytes_indexes. end < match_indexes. end {
89- context_bytes_indexes. end += self . context_bytes_size ;
90- }
91-
92- // The actual bytes for context + the matching bytes
93- // only for printing the result
94- let context_bytes = buffer[ context_bytes_indexes] . to_vec ( ) ;
95-
96- // Now we need to know the indexes of the match inside of context_bytes
97- let mut match_indexes = match_indexes. start % context_bytes. len ( )
98- ..match_indexes. end % context_bytes. len ( ) ;
99-
100- if match_indexes. end == 0 {
101- match_indexes. end = self . context_bytes_size ;
102- }
103-
104- // The index is relative to the position in the current buffer we are
105- // reading from the file, but we need to store the position relative to the
106- // whole file
107- Match :: new ( index + pos_in_file, match_indexes, context_bytes)
108- } )
109- . collect ( ) ;
110-
111- self . result . push ( result) ;
135+ let result = Self :: search_slice ( & buffer, self . pattern ) ;
136+ for index in result {
137+ self . matches . populate_matches ( index, pos_in_file, & buffer) ;
138+ }
139+ pos_in_file += Self :: BUFFER_SIZE ;
112140 }
113- pos_in_file += Self :: BUFFER_SIZE ;
114141 }
115142
116143 Ok ( ( ) )
@@ -161,8 +188,8 @@ impl<'a> Searcher<'a> {
161188 }
162189
163190 /// Get a reference to the searcher's result.
164- pub fn result ( & self ) -> & [ Vec < Match > ] {
165- & self . result
191+ pub fn result ( & self ) -> & Matches {
192+ & self . matches
166193 }
167194
168195 /// Return the context bytes size.
0 commit comments