@@ -11,6 +11,7 @@ use std::io::Read;
1111use std:: path:: { Path , PathBuf } ;
1212use std:: sync:: Arc ;
1313use std:: sync:: Mutex ;
14+ use std:: process:: Command ;
1415
1516// ---------------- Types ----------------
1617
@@ -722,58 +723,144 @@ impl<'a> Scanner<'a> {
722723 }
723724
724725 pub fn discover_files ( & self , roots : & [ PathBuf ] ) -> Vec < PathBuf > {
725- let mut paths = Vec :: new ( ) ;
726+ let mut discovered_paths = Vec :: new ( ) ;
726727
727- // Build glob matcher for include patterns
728+ // Compile include and exclude glob sets once
728729 let include_matcher: Option < globset:: GlobSet > = if !self . config . include_globs . is_empty ( ) {
729730 let mut builder = globset:: GlobSetBuilder :: new ( ) ;
730731 for pattern in & self . config . include_globs {
731- match globset:: Glob :: new ( pattern) {
732- Ok ( glob) => {
733- builder. add ( glob) ;
734- }
735- Err ( _) => {
736- return Vec :: new ( ) ; // Return empty on pattern error
737- }
732+ if let Ok ( glob) = globset:: Glob :: new ( pattern) {
733+ builder. add ( glob) ;
734+ } else {
735+ // If any pattern is invalid, return empty to avoid expensive scan with bad filter
736+ return Vec :: new ( ) ;
738737 }
739738 }
740739 builder. build ( ) . ok ( )
741740 } else {
742741 None
743742 } ;
744743
744+ let exclude_matcher: Option < globset:: GlobSet > = if !self . config . exclude_globs . is_empty ( ) {
745+ let mut builder = globset:: GlobSetBuilder :: new ( ) ;
746+ for pattern in & self . config . exclude_globs {
747+ if let Ok ( glob) = globset:: Glob :: new ( pattern) {
748+ builder. add ( glob) ;
749+ } else {
750+ return Vec :: new ( ) ;
751+ }
752+ }
753+ builder. build ( ) . ok ( )
754+ } else {
755+ None
756+ } ;
757+
758+ // Helper to apply path-based filters early (before metadata calls when possible)
759+ let path_allowed = |p : & Path | -> bool {
760+ if let Some ( ref ex) = exclude_matcher {
761+ if ex. is_match ( p) {
762+ return false ;
763+ }
764+ }
765+ if let Some ( ref inc) = include_matcher {
766+ if !inc. is_match ( p) {
767+ return false ;
768+ }
769+ }
770+ true
771+ } ;
772+
745773 for root in roots {
774+ // Fast path: leverage git index if available
775+ if root. join ( ".git" ) . exists ( ) {
776+ if let Some ( list) = git_list_files_fast ( root) {
777+ for path in list {
778+ if !path_allowed ( & path) {
779+ continue ;
780+ }
781+ // Only then stat for size
782+ if let Ok ( md) = fs:: metadata ( & path) {
783+ if md. is_file ( ) && ( md. len ( ) as usize ) <= self . config . max_file_size {
784+ discovered_paths. push ( path) ;
785+ }
786+ }
787+ }
788+ // Move on to next root after using the git fast path
789+ continue ;
790+ }
791+ }
792+
793+ // Fallback: parallel directory walk with ignore rules
746794 let mut builder = WalkBuilder :: new ( root) ;
747795 builder
748- . hidden ( false )
796+ . hidden ( false ) // preserve previous behavior: include hidden files/dirs
749797 . git_ignore ( true )
750798 . git_exclude ( true )
751- . ignore ( true ) ;
799+ . ignore ( true )
800+ . parents ( true )
801+ . follow_links ( false )
802+ . same_file_system ( false ) ;
752803
753- for entry in builder. build ( ) . flatten ( ) {
754- let md = match entry. metadata ( ) {
755- Ok ( m) => m,
756- Err ( _) => continue ,
757- } ;
758- if md. is_file ( ) {
759- if md. len ( ) as usize > self . config . max_file_size {
760- continue ;
804+ if let Ok ( n) = std:: thread:: available_parallelism ( ) {
805+ builder. threads ( n. get ( ) ) ;
806+ }
807+
808+ let out: Arc < Mutex < Vec < PathBuf > > > = Arc :: new ( Mutex :: new ( Vec :: with_capacity ( 4096 ) ) ) ;
809+ let out_ref = out. clone ( ) ;
810+
811+ builder. build_parallel ( ) . run ( || {
812+ let out = out_ref. clone ( ) ;
813+ Box :: new ( move |res| {
814+ let entry = match res {
815+ Ok ( e) => e,
816+ Err ( _) => return ignore:: WalkState :: Continue ,
817+ } ;
818+
819+ // Quickly skip non-files using cheap file_type when available
820+ if let Some ( ft) = entry. file_type ( ) {
821+ if !ft. is_file ( ) {
822+ return ignore:: WalkState :: Continue ;
823+ }
824+ } else {
825+ // Fallback to metadata if file_type unavailable
826+ if let Ok ( md) = entry. metadata ( ) {
827+ if !md. is_file ( ) {
828+ return ignore:: WalkState :: Continue ;
829+ }
830+ } else {
831+ return ignore:: WalkState :: Continue ;
832+ }
761833 }
762834
763835 let path = entry. into_path ( ) ;
764836
765- // Apply include glob filtering
766- if let Some ( ref matcher) = include_matcher {
767- if !matcher. is_match ( & path) {
768- continue ;
837+ // Apply path-based filters first to avoid unnecessary metadata calls
838+ if !path_allowed ( & path) {
839+ return ignore:: WalkState :: Continue ;
840+ }
841+
842+ // Size filter
843+ if let Ok ( md) = fs:: metadata ( & path) {
844+ if ( md. len ( ) as usize ) > self . config . max_file_size {
845+ return ignore:: WalkState :: Continue ;
769846 }
847+ } else {
848+ return ignore:: WalkState :: Continue ;
770849 }
771850
772- paths. push ( path) ;
773- }
851+ if let Ok ( mut guard) = out. lock ( ) {
852+ guard. push ( path) ;
853+ }
854+
855+ ignore:: WalkState :: Continue
856+ } )
857+ } ) ;
858+
859+ if let Ok ( mut guard) = out. lock ( ) {
860+ discovered_paths. append ( & mut * guard) ;
774861 }
775862 }
776- paths
863+ discovered_paths
777864 }
778865
779866 pub fn detect_language ( path : & Path ) -> Option < Language > {
@@ -913,6 +1000,38 @@ impl<'a> Scanner<'a> {
9131000 }
9141001}
9151002
1003+ fn git_list_files_fast ( root : & Path ) -> Option < Vec < PathBuf > > {
1004+ // Use git index for fast listing of tracked and untracked (non-ignored) files
1005+ // Equivalent to: git -C <root> ls-files -z --cached --others --exclude-standard
1006+ let output = Command :: new ( "git" )
1007+ . arg ( "-C" )
1008+ . arg ( root)
1009+ . arg ( "ls-files" )
1010+ . arg ( "-z" )
1011+ . arg ( "--cached" )
1012+ . arg ( "--others" )
1013+ . arg ( "--exclude-standard" )
1014+ . output ( )
1015+ . ok ( ) ?;
1016+ if !output. status . success ( ) {
1017+ return None ;
1018+ }
1019+ let bytes = output. stdout ;
1020+ if bytes. is_empty ( ) {
1021+ return Some ( Vec :: new ( ) ) ;
1022+ }
1023+ let mut list = Vec :: new ( ) ;
1024+ for rel in bytes. split ( |b| * b == 0 ) {
1025+ if rel. is_empty ( ) {
1026+ continue ;
1027+ }
1028+ if let Ok ( rel_path) = std:: str:: from_utf8 ( rel) {
1029+ list. push ( root. join ( rel_path) ) ;
1030+ }
1031+ }
1032+ Some ( list)
1033+ }
1034+
9161035fn prefilter_hit ( det : & dyn Detector , stripped : & [ u8 ] ) -> bool {
9171036 let pf = det. prefilter ( ) ;
9181037 if pf. substrings . is_empty ( ) {
0 commit comments