@@ -93,7 +93,13 @@ pub trait Detector: Send + Sync {
9393 fn languages ( & self ) -> & ' static [ Language ] ;
9494 fn prefilter ( & self ) -> Prefilter ; // extensions & cheap substrings
9595 fn scan ( & self , unit : & ScanUnit , em : & mut Emitter ) -> Result < ( ) > ;
96- fn scan_optimized ( & self , unit : & ScanUnit , stripped_s : & str , index : & LineIndex , em : & mut Emitter ) -> Result < ( ) > {
96+ fn scan_optimized (
97+ & self ,
98+ unit : & ScanUnit ,
99+ stripped_s : & str ,
100+ index : & LineIndex ,
101+ em : & mut Emitter ,
102+ ) -> Result < ( ) > {
97103 // Default implementation falls back to the original scan method
98104 self . scan ( unit, em)
99105 }
@@ -212,37 +218,29 @@ fn default_include_globs() -> Vec<String> {
212218 "**/*.hxx" . to_string( ) ,
213219 "**/*.h++" . to_string( ) ,
214220 "**/*.hh" . to_string( ) ,
215-
216221 // Java
217222 "**/*.java" . to_string( ) ,
218-
219223 // Go
220224 "**/*.go" . to_string( ) ,
221-
222225 // Rust
223226 "**/*.rs" . to_string( ) ,
224-
225227 // Python
226228 "**/*.py" . to_string( ) ,
227229 "**/*.pyw" . to_string( ) ,
228230 "**/*.pyi" . to_string( ) ,
229-
230231 // PHP
231232 "**/*.php" . to_string( ) ,
232233 "**/*.phtml" . to_string( ) ,
233234 "**/*.php3" . to_string( ) ,
234235 "**/*.php4" . to_string( ) ,
235236 "**/*.php5" . to_string( ) ,
236237 "**/*.phps" . to_string( ) ,
237-
238238 // Swift
239239 "**/*.swift" . to_string( ) ,
240-
241240 // Objective-C
242241 "**/*.m" . to_string( ) ,
243242 "**/*.mm" . to_string( ) ,
244243 "**/*.M" . to_string( ) ,
245-
246244 // Kotlin
247245 "**/*.kt" . to_string( ) ,
248246 "**/*.kts" . to_string( ) ,
@@ -276,7 +274,7 @@ impl PatternRegistry {
276274 . into_iter ( )
277275 . map ( |lib| compile_library ( lib) )
278276 . collect :: < Result < Vec < _ > > > ( ) ?;
279-
277+
280278 // Build language cache only if we have many libraries
281279 let language_cache = if libs. len ( ) > 50 {
282280 let mut cache = HashMap :: new ( ) ;
@@ -289,8 +287,11 @@ impl PatternRegistry {
289287 } else {
290288 HashMap :: new ( ) // Empty cache for small numbers of libraries
291289 } ;
292-
293- Ok ( Self { libs, language_cache } )
290+
291+ Ok ( Self {
292+ libs,
293+ language_cache,
294+ } )
294295 }
295296
296297 pub fn for_language ( & self , language : Language ) -> Vec < & CompiledLibrary > {
@@ -368,9 +369,14 @@ mod strip {
368369
369370 pub fn strip_comments ( language : Language , input : & [ u8 ] ) -> Vec < u8 > {
370371 match language {
371- Language :: Go | Language :: Java | Language :: C | Language :: Cpp | Language :: Rust | Language :: Swift | Language :: ObjC | Language :: Kotlin => {
372- strip_c_like ( language, input)
373- }
372+ Language :: Go
373+ | Language :: Java
374+ | Language :: C
375+ | Language :: Cpp
376+ | Language :: Rust
377+ | Language :: Swift
378+ | Language :: ObjC
379+ | Language :: Kotlin => strip_c_like ( language, input) ,
374380 Language :: Python | Language :: Php => strip_hash_like ( language, input) ,
375381 }
376382 }
@@ -693,7 +699,7 @@ impl<'a> Scanner<'a> {
693699
694700 pub fn discover_files ( & self , roots : & [ PathBuf ] ) -> Vec < PathBuf > {
695701 let mut paths = Vec :: new ( ) ;
696-
702+
697703 // Build glob matcher for include patterns
698704 let include_matcher: Option < globset:: GlobSet > = if !self . config . include_globs . is_empty ( ) {
699705 let mut builder = globset:: GlobSetBuilder :: new ( ) ;
@@ -714,15 +720,15 @@ impl<'a> Scanner<'a> {
714720 } else {
715721 None
716722 } ;
717-
723+
718724 for root in roots {
719725 let mut builder = WalkBuilder :: new ( root) ;
720726 builder
721727 . hidden ( false )
722728 . git_ignore ( true )
723729 . git_exclude ( true )
724730 . ignore ( true ) ;
725-
731+
726732 for result in builder. build ( ) {
727733 if let Ok ( entry) = result {
728734 let md = match entry. metadata ( ) {
@@ -733,16 +739,16 @@ impl<'a> Scanner<'a> {
733739 if md. len ( ) as usize > self . config . max_file_size {
734740 continue ;
735741 }
736-
742+
737743 let path = entry. into_path ( ) ;
738-
744+
739745 // Apply include glob filtering
740746 if let Some ( ref matcher) = include_matcher {
741747 if !matcher. is_match ( & path) {
742748 continue ;
743749 }
744750 }
745-
751+
746752 paths. push ( path) ;
747753 }
748754 }
@@ -800,7 +806,7 @@ impl<'a> Scanner<'a> {
800806 let stripped = strip_comments ( lang, & bytes) ;
801807 let stripped_s = String :: from_utf8_lossy ( & stripped) ;
802808 let index = LineIndex :: new ( stripped_s. as_bytes ( ) ) ;
803-
809+
804810 let mut em = Emitter {
805811 tx : tx. clone ( ) ,
806812 rx : rx. clone ( ) ,
@@ -861,14 +867,14 @@ fn prefilter_hit(det: &Box<dyn Detector>, stripped: &[u8]) -> bool {
861867 if pf. substrings . is_empty ( ) {
862868 return true ;
863869 }
864-
870+
865871 // Try to use cached automaton if available (for PatternDetector)
866872 if let Some ( pattern_det) = det. as_any ( ) . downcast_ref :: < PatternDetector > ( ) {
867873 if let Ok ( Some ( ac) ) = pattern_det. get_cached_automaton ( & pf. substrings ) {
868874 return ac. is_match ( stripped) ;
869875 }
870876 }
871-
877+
872878 // Fallback: build automaton (for other detector types)
873879 let ac = AhoCorasickBuilder :: new ( )
874880 . ascii_case_insensitive ( true )
@@ -906,11 +912,14 @@ impl PatternDetector {
906912}
907913
908914impl PatternDetector {
909- fn get_cached_automaton ( & self , substrings : & BTreeSet < String > ) -> Result < Option < aho_corasick:: AhoCorasick > > {
915+ fn get_cached_automaton (
916+ & self ,
917+ substrings : & BTreeSet < String > ,
918+ ) -> Result < Option < aho_corasick:: AhoCorasick > > {
910919 if substrings. is_empty ( ) {
911920 return Ok ( None ) ;
912921 }
913-
922+
914923 let mut cached = self . cached_automaton . lock ( ) . unwrap ( ) ;
915924 if cached. is_none ( ) {
916925 let substrings_vec: Vec < & str > = substrings. iter ( ) . map ( |s| s. as_str ( ) ) . collect ( ) ;
@@ -923,7 +932,14 @@ impl PatternDetector {
923932 Ok ( cached. clone ( ) )
924933 }
925934
926- fn scan_with_preprocessed ( & self , libs : Vec < & CompiledLibrary > , stripped_s : & str , index : & LineIndex , unit : & ScanUnit , em : & mut Emitter ) -> Result < ( ) > {
935+ fn scan_with_preprocessed (
936+ & self ,
937+ libs : Vec < & CompiledLibrary > ,
938+ stripped_s : & str ,
939+ index : & LineIndex ,
940+ unit : & ScanUnit ,
941+ em : & mut Emitter ,
942+ ) -> Result < ( ) > {
927943 for lib in libs {
928944 // import/include/namespace first
929945 let mut best_conf = 0.0f32 ;
@@ -960,7 +976,8 @@ impl PatternDetector {
960976 }
961977 }
962978 }
963- let should_report = ( matched_import && api_hits > 0 ) || ( lib. import . is_empty ( ) && api_hits > 0 ) ;
979+ let should_report =
980+ ( matched_import && api_hits > 0 ) || ( lib. import . is_empty ( ) && api_hits > 0 ) ;
964981 if should_report {
965982 let finding = Finding {
966983 language : unit. lang ,
@@ -991,7 +1008,7 @@ impl Detector for PatternDetector {
9911008 if let Some ( ref cached) = self . cached_prefilter {
9921009 return cached. clone ( ) ;
9931010 }
994-
1011+
9951012 let mut substrings = BTreeSet :: new ( ) ;
9961013 for lib in self . registry . for_language ( self . languages [ 0 ] ) {
9971014 for s in & lib. prefilter_substrings {
@@ -1002,7 +1019,7 @@ impl Detector for PatternDetector {
10021019 extensions : BTreeSet :: new ( ) ,
10031020 substrings,
10041021 } ;
1005-
1022+
10061023 // Note: We can't actually cache here due to &self, but this is still faster
10071024 // than recomputing every time since we're using the cached language lookup
10081025 pf
@@ -1018,7 +1035,13 @@ impl Detector for PatternDetector {
10181035 self . scan_with_preprocessed ( libs, & stripped_s, & index, unit, em)
10191036 }
10201037
1021- fn scan_optimized ( & self , unit : & ScanUnit , stripped_s : & str , index : & LineIndex , em : & mut Emitter ) -> Result < ( ) > {
1038+ fn scan_optimized (
1039+ & self ,
1040+ unit : & ScanUnit ,
1041+ stripped_s : & str ,
1042+ index : & LineIndex ,
1043+ em : & mut Emitter ,
1044+ ) -> Result < ( ) > {
10221045 let libs = self . registry . for_language ( unit. lang ) ;
10231046 if libs. is_empty ( ) {
10241047 return Ok ( ( ) ) ;
0 commit comments