1+ //! This module provides functionalities to match patterns in strings
2+ //! and compute the Z-array for a given input string.
3+
4+ /// Calculates the Z-value for a given substring of the input string
5+ /// based on a specified pattern.
6+ ///
7+ /// # Parameters
8+ /// - `input_string`: A slice of elements that represents the input string.
9+ /// - `pattern`: A slice of elements representing the pattern to match.
10+ /// - `start_index`: The index in the input string to start checking for matches.
11+ /// - `z_value`: The initial Z-value to be computed.
12+ ///
13+ /// # Returns
14+ /// The computed Z-value indicating the length of the matching prefix.
15+ fn calculate_z_value < T : Eq > (
16+ input_string : & [ T ] ,
17+ pattern : & [ T ] ,
18+ start_index : usize ,
19+ mut z_value : usize ,
20+ ) -> usize {
21+ let size = input_string. len ( ) ;
22+ let pattern_size = pattern. len ( ) ;
23+
24+ while ( start_index + z_value) < size && z_value < pattern_size {
25+ if input_string[ start_index + z_value] != pattern[ z_value] {
26+ break ;
27+ }
28+ z_value += 1 ;
29+ }
30+ z_value
31+ }
32+
33+ /// Initializes the Z-array value based on a previous match and updates
34+ /// it to optimize further calculations.
35+ ///
36+ /// # Parameters
37+ /// - `z_array`: A mutable slice of the Z-array to be updated.
38+ /// - `i`: The current index in the input string.
39+ /// - `match_end`: The index of the last character matched in the pattern.
40+ /// - `last_match`: The index of the last match found.
41+ ///
42+ /// # Returns
43+ /// The initialized Z-array value for the current index.
44+ fn initialize_z_array_from_previous_match (
45+ z_array : & mut [ usize ] ,
46+ i : usize ,
47+ match_end : usize ,
48+ last_match : usize ,
49+ ) -> usize {
50+ std:: cmp:: min ( z_array[ i - last_match] , match_end - i + 1 )
51+ }
52+
53+ /// Finds the starting indices of all full matches of the pattern
54+ /// in the Z-array.
55+ ///
56+ /// # Parameters
57+ /// - `z_array`: A slice of the Z-array containing computed Z-values.
58+ /// - `pattern_size`: The length of the pattern to find in the Z-array.
59+ ///
60+ /// # Returns
61+ /// A vector containing the starting indices of full matches.
62+ fn find_full_matches ( z_array : & [ usize ] , pattern_size : usize ) -> Vec < usize > {
63+ z_array
64+ . iter ( )
65+ . enumerate ( )
66+ . filter_map ( |( idx, & z_value) | ( z_value == pattern_size) . then_some ( idx) )
67+ . collect ( )
68+ }
69+
70+ /// Matches the occurrences of a pattern in an input string starting
71+ /// from a specified index.
72+ ///
73+ /// # Parameters
74+ /// - `input_string`: A slice of elements to search within.
75+ /// - `pattern`: A slice of elements that represents the pattern to match.
76+ /// - `start_index`: The index in the input string to start the search.
77+ /// - `only_full_matches`: If true, only full matches of the pattern will be returned.
78+ ///
79+ /// # Returns
80+ /// A vector containing the starting indices of the matches.
181fn match_with_z_array < T : Eq > (
282 input_string : & [ T ] ,
383 pattern : & [ T ] ,
@@ -8,41 +88,54 @@ fn match_with_z_array<T: Eq>(
888 let pattern_size = pattern. len ( ) ;
989 let mut last_match: usize = 0 ;
1090 let mut match_end: usize = 0 ;
11- let mut array = vec ! [ 0usize ; size] ;
91+ let mut z_array = vec ! [ 0usize ; size] ;
92+
1293 for i in start_index..size {
13- // getting plain z array of a string requires matching from index
14- // 1 instead of 0 (which gives a trivial result instead)
1594 if i <= match_end {
16- array[ i] = std:: cmp:: min ( array[ i - last_match] , match_end - i + 1 ) ;
17- }
18- while ( i + array[ i] ) < size && array[ i] < pattern_size {
19- if input_string[ i + array[ i] ] != pattern[ array[ i] ] {
20- break ;
21- }
22- array[ i] += 1 ;
95+ z_array[ i] =
96+ initialize_z_array_from_previous_match ( & mut z_array, i, match_end, last_match) ;
2397 }
24- if ( i + array[ i] ) > ( match_end + 1 ) {
25- match_end = i + array[ i] - 1 ;
98+
99+ z_array[ i] = calculate_z_value ( input_string, pattern, i, z_array[ i] ) ;
100+
101+ if i + z_array[ i] > match_end + 1 {
102+ match_end = i + z_array[ i] - 1 ;
26103 last_match = i;
27104 }
28105 }
106+
29107 if !only_full_matches {
30- array
108+ z_array
31109 } else {
32- let mut answer: Vec < usize > = vec ! [ ] ;
33- for ( idx, number) in array. iter ( ) . enumerate ( ) {
34- if * number == pattern_size {
35- answer. push ( idx) ;
36- }
37- }
38- answer
110+ find_full_matches ( & z_array, pattern_size)
39111 }
40112}
41113
114+ /// Constructs the Z-array for the given input string.
115+ ///
116+ /// The Z-array is an array where the i-th element is the length of the longest
117+ /// substring starting from s[i] that is also a prefix of s.
118+ ///
119+ /// # Parameters
120+ /// - `input`: A slice of the input string for which the Z-array is to be constructed.
121+ ///
122+ /// # Returns
123+ /// A vector representing the Z-array of the input string.
42124pub fn z_array < T : Eq > ( input : & [ T ] ) -> Vec < usize > {
43125 match_with_z_array ( input, input, 1 , false )
44126}
45127
128+ /// Matches the occurrences of a given pattern in an input string.
129+ ///
130+ /// This function acts as a wrapper around `match_with_z_array` to provide a simpler
131+ /// interface for pattern matching, returning only full matches.
132+ ///
133+ /// # Parameters
134+ /// - `input`: A slice of the input string where the pattern will be searched.
135+ /// - `pattern`: A slice of the pattern to search for in the input string.
136+ ///
137+ /// # Returns
138+ /// A vector of indices where the pattern matches the input string.
46139pub fn match_pattern < T : Eq > ( input : & [ T ] , pattern : & [ T ] ) -> Vec < usize > {
47140 match_with_z_array ( input, pattern, 0 , true )
48141}
@@ -51,56 +144,67 @@ pub fn match_pattern<T: Eq>(input: &[T], pattern: &[T]) -> Vec<usize> {
51144mod tests {
52145 use super :: * ;
53146
54- #[ test]
55- fn test_z_array ( ) {
56- let string = "aabaabab" ;
57- let array = z_array ( string. as_bytes ( ) ) ;
58- assert_eq ! ( array, vec![ 0 , 1 , 0 , 4 , 1 , 0 , 1 , 0 ] ) ;
147+ macro_rules! test_match_pattern {
148+ ( $( $name: ident: ( $input: expr, $pattern: expr, $expected: expr) , ) * ) => {
149+ $(
150+ #[ test]
151+ fn $name( ) {
152+ let ( input, pattern, expected) = ( $input, $pattern, $expected) ;
153+ assert_eq!( match_pattern( input. as_bytes( ) , pattern. as_bytes( ) ) , expected) ;
154+ }
155+ ) *
156+ } ;
59157 }
60158
61- #[ test]
62- fn pattern_in_text ( ) {
63- let text: & str = concat ! (
64- "lorem ipsum dolor sit amet, consectetur " ,
65- "adipiscing elit, sed do eiusmod tempor " ,
66- "incididunt ut labore et dolore magna aliqua"
67- ) ;
68- let pattern1 = "rem" ;
69- let pattern2 = "em" ;
70- let pattern3 = ";alksdjfoiwer" ;
71- let pattern4 = "m" ;
72-
73- assert_eq ! ( match_pattern( text. as_bytes( ) , pattern1. as_bytes( ) ) , vec![ 2 ] ) ;
74- assert_eq ! (
75- match_pattern( text. as_bytes( ) , pattern2. as_bytes( ) ) ,
76- vec![ 3 , 73 ]
77- ) ;
78- assert_eq ! ( match_pattern( text. as_bytes( ) , pattern3. as_bytes( ) ) , vec![ ] ) ;
79- assert_eq ! (
80- match_pattern( text. as_bytes( ) , pattern4. as_bytes( ) ) ,
81- vec![ 4 , 10 , 23 , 68 , 74 , 110 ]
82- ) ;
159+ macro_rules! test_z_array_cases {
160+ ( $( $name: ident: ( $input: expr, $expected: expr) , ) * ) => {
161+ $(
162+ #[ test]
163+ fn $name( ) {
164+ let ( input, expected) = ( $input, $expected) ;
165+ assert_eq!( z_array( input. as_bytes( ) ) , expected) ;
166+ }
167+ ) *
168+ } ;
169+ }
83170
84- let text2 = "aaaaaaaa" ;
85- let pattern5 = "aaa" ;
86- assert_eq ! (
87- match_pattern( text2. as_bytes( ) , pattern5. as_bytes( ) ) ,
171+ test_match_pattern ! {
172+ simple_match: ( "abcabcabc" , "abc" , vec![ 0 , 3 , 6 ] ) ,
173+ no_match: ( "abcdef" , "xyz" , vec![ ] ) ,
174+ single_char_match: ( "aaaaaa" , "a" , vec![ 0 , 1 , 2 , 3 , 4 , 5 ] ) ,
175+ overlapping_match: ( "abababa" , "aba" , vec![ 0 , 2 , 4 ] ) ,
176+ full_string_match: ( "pattern" , "pattern" , vec![ 0 ] ) ,
177+ empty_pattern: ( "nonempty" , " " , vec![ ] ) ,
178+ pattern_larger_than_text: ( "small" , "largerpattern" , vec![ ] ) ,
179+ repeated_pattern_in_text: (
180+ "aaaaaaaa" ,
181+ "aaa" ,
88182 vec![ 0 , 1 , 2 , 3 , 4 , 5 ]
89- )
183+ ) ,
184+ pattern_not_in_lipsum: (
185+ concat!(
186+ "lorem ipsum dolor sit amet, consectetur " ,
187+ "adipiscing elit, sed do eiusmod tempor " ,
188+ "incididunt ut labore et dolore magna aliqua"
189+ ) ,
190+ ";alksdjfoiwer" ,
191+ vec![ ]
192+ ) ,
193+ pattern_in_lipsum: (
194+ concat!(
195+ "lorem ipsum dolor sit amet, consectetur " ,
196+ "adipiscing elit, sed do eiusmod tempor " ,
197+ "incididunt ut labore et dolore magna aliqua"
198+ ) ,
199+ "m" ,
200+ vec![ 4 , 10 , 23 , 68 , 74 , 110 ]
201+ ) ,
90202 }
91203
92- #[ test]
93- fn long_pattern_in_text ( ) {
94- let text = vec ! [ 65u8 ; 1e5 as usize ] ;
95- let pattern = vec ! [ 65u8 ; 5e4 as usize ] ;
96-
97- let mut expected_answer = vec ! [ 0usize ; ( 1e5 - 5e4 + 1f64 ) as usize ] ;
98- for ( idx, i) in expected_answer. iter_mut ( ) . enumerate ( ) {
99- * i = idx;
100- }
101- assert_eq ! (
102- match_pattern( text. as_slice( ) , pattern. as_slice( ) ) ,
103- expected_answer
104- ) ;
204+ test_z_array_cases ! {
205+ basic_z_array: ( "aabaabab" , vec![ 0 , 1 , 0 , 4 , 1 , 0 , 1 , 0 ] ) ,
206+ empty_string: ( "" , vec![ ] ) ,
207+ single_char_z_array: ( "a" , vec![ 0 ] ) ,
208+ repeated_char_z_array: ( "aaaaaa" , vec![ 0 , 5 , 4 , 3 , 2 , 1 ] ) ,
105209 }
106210}
0 commit comments