@@ -4,8 +4,79 @@ use crate::pattern::{build_pattern, is_boundary, Match};
44use crate :: scanner:: { CoercionMode , MatchHunk , VariantMap } ;
55use bstr:: ByteSlice ;
66use regex:: bytes:: Regex ;
7+ use std:: collections:: BTreeSet ;
78use std:: path:: { Path , PathBuf } ;
89
10+ /// Precompiled identifier extractor reused across files to avoid recompiling
11+ /// regex patterns on every scan iteration.
12+ pub struct IdentifierExtractor {
13+ regex : Regex ,
14+ split_on_dots : bool ,
15+ }
16+
17+ impl IdentifierExtractor {
18+ /// Construct an extractor tuned for the provided style set.
19+ pub fn new ( styles : & [ Style ] ) -> Self {
20+ let title_pattern = "[A-Z][a-z]+(?:\\ s+[A-Z][a-z]+)*" ;
21+ let identifier_pattern = "[a-zA-Z_][a-zA-Z0-9_\\ -\\ .]*" ;
22+ let pattern = if styles. contains ( & Style :: Title ) {
23+ format ! ( r"\b(?:{}|{})\b" , title_pattern, identifier_pattern)
24+ } else {
25+ format ! ( r"\b{}\b" , identifier_pattern)
26+ } ;
27+
28+ let regex = Regex :: new ( & pattern) . expect ( "identifier regex must compile" ) ;
29+ let split_on_dots = !styles. contains ( & Style :: Dot ) ;
30+
31+ Self {
32+ regex,
33+ split_on_dots,
34+ }
35+ }
36+
37+ /// Find all potential identifiers in the content using the precompiled pattern.
38+ pub fn find_all ( & self , content : & [ u8 ] ) -> Vec < ( usize , usize , String ) > {
39+ let mut identifiers = Vec :: new ( ) ;
40+
41+ for m in self . regex . find_iter ( content) {
42+ let identifier = String :: from_utf8_lossy ( m. as_bytes ( ) ) . to_string ( ) ;
43+
44+ if std:: env:: var ( "RENAMIFY_DEBUG_IDENTIFIERS" ) . is_ok ( ) {
45+ println ! (
46+ "Found identifier: '{}' at {}-{}" ,
47+ identifier,
48+ m. start( ) ,
49+ m. end( )
50+ ) ;
51+ }
52+
53+ if identifier. contains ( '.' ) && self . split_on_dots {
54+ let parts: Vec < & str > = identifier. split ( '.' ) . collect ( ) ;
55+ let mut current_pos = m. start ( ) ;
56+
57+ for ( i, part) in parts. iter ( ) . enumerate ( ) {
58+ if !part. is_empty ( ) {
59+ identifiers. push ( (
60+ current_pos,
61+ current_pos + part. len ( ) ,
62+ ( * part) . to_string ( ) ,
63+ ) ) ;
64+ }
65+ current_pos += part. len ( ) + 1 ; // Account for the dot separator
66+
67+ if i < parts. len ( ) - 1 && current_pos <= m. end ( ) {
68+ // Dot already handled by position increment above.
69+ }
70+ }
71+ } else {
72+ identifiers. push ( ( m. start ( ) , m. end ( ) , identifier) ) ;
73+ }
74+ }
75+
76+ identifiers
77+ }
78+ }
79+
980/// Normalize a path by removing Windows long path prefix if present
1081fn normalize_path ( path : & Path ) -> PathBuf {
1182 #[ cfg( windows) ]
@@ -39,66 +110,6 @@ fn byte_offset_to_char_offset(text: &str, byte_offset: usize) -> usize {
39110 char_offset
40111}
41112
42- /// Find all potential identifiers in the content using a broad regex pattern
43- fn find_all_identifiers ( content : & [ u8 ] , styles : & [ Style ] ) -> Vec < ( usize , usize , String ) > {
44- let mut identifiers = Vec :: new ( ) ;
45-
46- // Pattern to match identifier-like strings, including dots in some contexts
47- // This is tricky: we want to split on dots for things like obj.prop but keep
48- // dots for mixed-style identifiers like config.max_value
49- // For Title style, we need to include spaces to capture "Title Case" patterns
50- let pattern = if styles. len ( ) == 1 && styles[ 0 ] == Style :: Title {
51- // Special pattern for Title style that includes spaces
52- r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b"
53- } else {
54- r"\b[a-zA-Z_][a-zA-Z0-9_\-\.]*\b"
55- } ;
56- let regex = Regex :: new ( pattern) . unwrap ( ) ;
57-
58- for m in regex. find_iter ( content) {
59- let identifier = String :: from_utf8_lossy ( m. as_bytes ( ) ) . to_string ( ) ;
60-
61- // Debug: print what identifiers are being found
62- if std:: env:: var ( "RENAMIFY_DEBUG_IDENTIFIERS" ) . is_ok ( ) {
63- println ! (
64- "Found identifier: '{}' at {}-{}" ,
65- identifier,
66- m. start( ) ,
67- m. end( )
68- ) ;
69- }
70-
71- // Only split on dots if dot style is NOT in the selected styles
72- // When dot style is selected, keep dot-separated identifiers intact
73- let should_split_on_dots = !styles. contains ( & Style :: Dot ) ;
74-
75- if identifier. contains ( '.' ) && should_split_on_dots {
76- // Split on dots for things like obj.method or this.property
77- // But NOT when we're specifically looking for dot.case style
78- let parts: Vec < & str > = identifier. split ( '.' ) . collect ( ) ;
79- let mut current_pos = m. start ( ) ;
80-
81- for ( i, part) in parts. iter ( ) . enumerate ( ) {
82- if !part. is_empty ( ) {
83- identifiers. push ( ( current_pos, current_pos + part. len ( ) , ( * part) . to_string ( ) ) ) ;
84- }
85- current_pos += part. len ( ) + 1 ; // +1 for the dot
86-
87- // If there are more parts, we've consumed a dot
88- if i < parts. len ( ) - 1 && current_pos <= m. end ( ) {
89- // The dot is at current_pos - 1, move past it
90- // current_pos is already at the right position for the next part
91- }
92- }
93- } else {
94- // Keep as single identifier (including dots)
95- identifiers. push ( ( m. start ( ) , m. end ( ) , identifier) ) ;
96- }
97- }
98-
99- identifiers
100- }
101-
102113/// Enhanced matching that finds both exact and compound matches
103114pub fn find_enhanced_matches (
104115 content : & [ u8 ] ,
@@ -107,6 +118,8 @@ pub fn find_enhanced_matches(
107118 replace : & str ,
108119 variant_map : & VariantMap ,
109120 styles : & [ Style ] ,
121+ identifier_extractor : & IdentifierExtractor ,
122+ additional_lines : Option < & BTreeSet < usize > > ,
110123) -> Vec < Match > {
111124 let mut all_matches = Vec :: new ( ) ;
112125 let mut processed_ranges = Vec :: new ( ) ; // Track (start, end) ranges that were exactly matched
@@ -163,7 +176,60 @@ pub fn find_enhanced_matches(
163176
164177 // Third, find all identifiers and check for compound matches
165178 {
166- let identifiers = find_all_identifiers ( content, styles) ;
179+ let identifiers = if processed_ranges. is_empty ( ) {
180+ identifier_extractor. find_all ( content)
181+ } else {
182+ let mut candidate_lines = BTreeSet :: new ( ) ;
183+ for m in & all_matches {
184+ candidate_lines. insert ( m. line ) ;
185+ if m. line > 1 {
186+ candidate_lines. insert ( m. line - 1 ) ;
187+ }
188+ candidate_lines. insert ( m. line + 1 ) ;
189+ }
190+
191+ if let Some ( extra_lines) = additional_lines {
192+ candidate_lines. extend ( extra_lines. iter ( ) . copied ( ) ) ;
193+ }
194+
195+ if candidate_lines. is_empty ( ) {
196+ identifier_extractor. find_all ( content)
197+ } else {
198+ let mut line_offsets = Vec :: new ( ) ;
199+ let mut pos = 0 ;
200+ for line in content. lines_with_terminator ( ) {
201+ line_offsets. push ( pos) ;
202+ pos += line. len ( ) ;
203+ }
204+
205+ let mut scoped_identifiers = Vec :: new ( ) ;
206+ for line_idx in candidate_lines {
207+ let idx = line_idx. saturating_sub ( 1 ) ;
208+ if idx >= line_offsets. len ( ) {
209+ continue ;
210+ }
211+
212+ let start = line_offsets[ idx] ;
213+ let end = if idx + 1 < line_offsets. len ( ) {
214+ line_offsets[ idx + 1 ]
215+ } else {
216+ content. len ( )
217+ } ;
218+ let slice = & content[ start..end] ;
219+
220+ for ( local_start, local_end, identifier) in identifier_extractor. find_all ( slice)
221+ {
222+ scoped_identifiers. push ( (
223+ start + local_start,
224+ start + local_end,
225+ identifier,
226+ ) ) ;
227+ }
228+ }
229+
230+ scoped_identifiers
231+ }
232+ } ;
167233
168234 for ( start, end, identifier) in identifiers {
169235 // Skip if this identifier was already matched exactly or if it's completely contained within a processed range
@@ -381,7 +447,8 @@ mod tests {
381447 let content = b"let preview_format_arg = PreviewFormatArg::new();" ;
382448 // Use default styles for test
383449 let styles = vec ! [ Style :: Snake , Style :: Pascal ] ;
384- let identifiers = find_all_identifiers ( content, & styles) ;
450+ let extractor = IdentifierExtractor :: new ( & styles) ;
451+ let identifiers = extractor. find_all ( content) ;
385452
386453 // Should find: let, preview_format_arg, PreviewFormatArg, new
387454 assert ! ( identifiers. len( ) >= 4 ) ;
@@ -397,7 +464,8 @@ mod tests {
397464
398465 // When looking for dot style only, keep dot-separated identifiers intact
399466 let dot_styles = vec ! [ Style :: Dot ] ;
400- let identifiers = find_all_identifiers ( content, & dot_styles) ;
467+ let extractor = IdentifierExtractor :: new ( & dot_styles) ;
468+ let identifiers = extractor. find_all ( content) ;
401469 let names: Vec < String > = identifiers. iter ( ) . map ( |( _, _, id) | id. clone ( ) ) . collect ( ) ;
402470 assert ! ( names. contains( & "test.case" . to_string( ) ) ) ;
403471 assert ! ( names. contains( & "use.case" . to_string( ) ) ) ;
@@ -406,7 +474,8 @@ mod tests {
406474
407475 // When using other styles, split on dots
408476 let other_styles = vec ! [ Style :: Snake , Style :: Camel ] ;
409- let identifiers = find_all_identifiers ( content, & other_styles) ;
477+ let extractor = IdentifierExtractor :: new ( & other_styles) ;
478+ let identifiers = extractor. find_all ( content) ;
410479 let names: Vec < String > = identifiers. iter ( ) . map ( |( _, _, id) | id. clone ( ) ) . collect ( ) ;
411480 // Should split into individual parts
412481 assert ! ( names. contains( & "test" . to_string( ) ) ) ;
@@ -439,9 +508,18 @@ mod tests {
439508 ) ;
440509
441510 let styles = vec ! [ Style :: Snake , Style :: Pascal ] ;
511+ let extractor = IdentifierExtractor :: new ( & styles) ;
442512
443- let matches =
444- find_enhanced_matches ( content, "test.rs" , search, replace, & variant_map, & styles) ;
513+ let matches = find_enhanced_matches (
514+ content,
515+ "test.rs" ,
516+ search,
517+ replace,
518+ & variant_map,
519+ & styles,
520+ & extractor,
521+ None ,
522+ ) ;
445523
446524 // Should find both preview_format_arg and PreviewFormatArg
447525 assert_eq ! ( matches. len( ) , 2 ) ;
0 commit comments