@@ -119,19 +119,19 @@ fn build_matcher(raw: bool, text: &str, language: SourceLanguage) -> Option<Mess
119119 let mut quality = 0 ;
120120 for cap in language. get_placeholder_regex ( ) . captures_iter ( text) {
121121 let placeholder = cap. get ( 0 ) . unwrap ( ) ;
122- let text = escape_ignore_newlines ( raw, & text[ last_end..placeholder. start ( ) ] ) ;
123- quality += text . chars ( ) . filter ( |c| !c. is_whitespace ( ) ) . count ( ) ;
124- pattern. push_str ( text . as_str ( ) ) ;
122+ let subtext = escape_ignore_newlines ( raw, & text[ last_end..placeholder. start ( ) ] ) ;
123+ quality += subtext . chars ( ) . filter ( |c| !c. is_whitespace ( ) ) . count ( ) ;
124+ pattern. push_str ( subtext . as_str ( ) ) ;
125125 last_end = placeholder. end ( ) ;
126126 pattern. push_str ( "(.+)" ) ;
127127 args. push ( language. captures_to_format_arg ( & cap) ) ;
128128 }
129- let text = escape_ignore_newlines ( raw, & text[ last_end..] ) ;
130- quality += text . chars ( ) . filter ( |c| !c. is_whitespace ( ) ) . count ( ) ;
129+ let subtext = escape_ignore_newlines ( raw, & text[ last_end..] ) ;
130+ quality += subtext . chars ( ) . filter ( |c| !c. is_whitespace ( ) ) . count ( ) ;
131131 if quality == 0 {
132132 None
133133 } else {
134- pattern. push_str ( text . as_str ( ) ) ;
134+ pattern. push_str ( subtext . as_str ( ) ) ;
135135 pattern. push ( '$' ) ;
136136 Some ( MessageMatcher {
137137 matcher : Regex :: new ( pattern. as_str ( ) ) . unwrap ( ) ,
@@ -142,14 +142,27 @@ fn build_matcher(raw: bool, text: &str, language: SourceLanguage) -> Option<Mess
142142 }
143143}
144144
145+ /// Regex for finding values that need to be escaped in a string-literal. The components are
146+ /// as follows:
147+ ///
148+ /// * `[.*+?^${}()|\[\]]` - Characters that are used in regexes and need to be escaped.
149+ /// * `[\n\r\t]` - White space characters that we should turn into regex escape sequences.
150+ /// * `\\[0-7]{3}|\\0` - Regex does not support octal escape-sequences, so we need to turn
151+ /// them into a hex escape.
152+ /// * `\\N\{[^}]+}` - Python named-Unicode escape that is turned into a `\w` since it would be
153+ /// challenging to get the names all right.
145154static ESCAPE_REGEX : LazyLock < Regex > = LazyLock :: new ( || {
146155 Regex :: new ( r#"([.*+?^${}()|\[\]])|([\n\r\t])|(\\[0-7]{3}|\\0)|(\\N\{[^}]+})"# ) . unwrap ( )
147156} ) ;
148157
149- // A regex for raw strings that doesn't try to interpret escape sequences.
150- static RAW_ESCAPE_REGEX : LazyLock < Regex > = LazyLock :: new ( || {
151- Regex :: new ( r#"([.*+?^${}()|\[\]])|([\n\r\t])|(\\)"# ) . unwrap ( )
152- } ) ;
158+ /// Regex for finding values that need to be escaped in a raw string-literal. The components are
159+ /// as follows:
160+ ///
161+ /// * `[.*+?^${}()|\[\]]` - Characters that are used in regexes and need to be escaped.
162+ /// * `[\n\r\t]` - White space characters that we should turn into regex escape sequences.
163+ /// * `\\` - A backslash
164+ static RAW_ESCAPE_REGEX : LazyLock < Regex > =
165+ LazyLock :: new ( || Regex :: new ( r#"([.*+?^${}()|\[\]])|([\n\r\t])|(\\)"# ) . unwrap ( ) ) ;
153166
154167/// Escape special chars except newlines and carriage returns in order to support multiline strings
155168fn escape_ignore_newlines ( raw : bool , segment : & str ) -> String {
@@ -181,7 +194,7 @@ fn escape_ignore_newlines(raw: bool, segment: &str) -> String {
181194 } else if let Some ( c) = cap. get ( 3 ) {
182195 if raw {
183196 result. push ( '\\' ) ;
184- result. push_str ( c. as_str ( ) ) ;
197+ result. push_str ( c. as_str ( ) ) ;
185198 } else {
186199 let c = c. as_str ( ) ;
187200 let c = & c[ 1 ..] ;
@@ -303,4 +316,20 @@ mod tests {
303316 matcher. as_str( )
304317 ) ;
305318 }
319+
320+ #[ test]
321+ fn test_build_matcher_raw ( ) {
322+ let MessageMatcher { matcher, .. } = build_matcher (
323+ true ,
324+ "Hard-coded \\ Windows\\ Path" ,
325+ SourceLanguage :: Rust ,
326+ )
327+ . unwrap ( ) ;
328+ assert_eq ! (
329+ Regex :: new( r#"(?s)^Hard-coded \\Windows\\Path$"# )
330+ . unwrap( )
331+ . as_str( ) ,
332+ matcher. as_str( )
333+ ) ;
334+ }
306335}
0 commit comments