Skip to content

Commit 2acede0

Browse files
cursoragentscript3r
andcommitted
Refactor scanner to process files in batches
Co-authored-by: script3r <[email protected]>
1 parent f56935d commit 2acede0

File tree

1 file changed

+22
-24
lines changed

1 file changed

+22
-24
lines changed

crates/scanner-core/src/lib.rs

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -850,7 +850,7 @@ impl<'a> Scanner<'a> {
850850

851851
// Update discovered files counter atomically (no lock!)
852852
files_discovered.fetch_add(1, Ordering::Relaxed);
853-
853+
854854
// Send discovery progress update (1 = discovery signal)
855855
if let Some(ref progress_tx) = progress_sender {
856856
let _ = progress_tx.send(1);
@@ -872,19 +872,19 @@ impl<'a> Scanner<'a> {
872872
progress_sender: Option<Sender<usize>>,
873873
) -> Result<()> {
874874
const BATCH_SIZE: usize = 1000; // Process files in batches for better cache locality
875-
875+
876876
let mut batch = Vec::with_capacity(BATCH_SIZE);
877877
let mut _processed_count = 0usize;
878-
878+
879879
// Collect files into batches and process them
880880
for path in work_receiver.iter() {
881881
batch.push(path);
882-
882+
883883
if batch.len() >= BATCH_SIZE {
884884
let (processed, findings) = self.process_batch(&batch, &findings_sender)?;
885885
_processed_count += processed;
886886
batch.clear();
887-
887+
888888
// Send processing progress update (2 = processing signal, repeated for batch size)
889889
if let Some(ref progress_tx) = progress_sender {
890890
for _ in 0..processed {
@@ -897,12 +897,12 @@ impl<'a> Scanner<'a> {
897897
}
898898
}
899899
}
900-
900+
901901
// Process remaining files in the final batch
902902
if !batch.is_empty() {
903903
let (processed, findings) = self.process_batch(&batch, &findings_sender)?;
904904
_processed_count += processed;
905-
905+
906906
if let Some(ref progress_tx) = progress_sender {
907907
for _ in 0..processed {
908908
let _ = progress_tx.send(2);
@@ -916,27 +916,25 @@ impl<'a> Scanner<'a> {
916916

917917
Ok(())
918918
}
919-
919+
920920
/// Process a batch of files in parallel for better performance
921921
fn process_batch(
922922
&self,
923-
batch: &[PathBuf],
924-
findings_sender: &Sender<Finding>
923+
batch: &[PathBuf],
924+
findings_sender: &Sender<Finding>,
925925
) -> Result<(usize, usize)> {
926926
// Process the batch in parallel using rayon
927927
let results: Vec<usize> = batch
928928
.par_iter()
929-
.map(|path| {
930-
match self.scan_file(path, findings_sender) {
931-
Ok(findings_count) => findings_count,
932-
Err(e) => {
933-
eprintln!("Error scanning file {:?}: {}", path, e);
934-
0
935-
}
929+
.map(|path| match self.scan_file(path, findings_sender) {
930+
Ok(findings_count) => findings_count,
931+
Err(e) => {
932+
eprintln!("Error scanning file {:?}: {}", path, e);
933+
0
936934
}
937935
})
938936
.collect();
939-
937+
940938
let total_findings = results.iter().sum();
941939
Ok((batch.len(), total_findings))
942940
}
@@ -1059,36 +1057,36 @@ impl<'a> Scanner<'a> {
10591057
/// Ultra-fast language detection that avoids string allocations
10601058
pub fn detect_language(path: &Path) -> Option<Language> {
10611059
let ext = path.extension()?;
1062-
1060+
10631061
// Fast path: check common extensions without string conversion
10641062
match ext.as_encoded_bytes() {
10651063
// Single char extensions
10661064
b"c" => Some(Language::C),
10671065
b"h" => Some(Language::C),
10681066
b"m" | b"M" => Some(Language::ObjC),
1069-
1070-
// Two char extensions
1067+
1068+
// Two char extensions
10711069
b"go" => Some(Language::Go),
10721070
b"rs" => Some(Language::Rust),
10731071
b"py" => Some(Language::Python),
10741072
b"kt" => Some(Language::Kotlin),
10751073
b"cc" => Some(Language::Cpp),
10761074
b"mm" => Some(Language::ObjC),
1077-
1075+
10781076
// Three char extensions
10791077
b"cpp" | b"cxx" | b"hpp" | b"hxx" => Some(Language::Cpp),
10801078
b"php" => Some(Language::Php),
10811079
b"pyw" | b"pyi" => Some(Language::Python),
10821080
b"kts" => Some(Language::Kotlin),
10831081
b"erl" | b"hrl" => Some(Language::Erlang),
1084-
1082+
10851083
// Four+ char extensions
10861084
b"java" => Some(Language::Java),
10871085
b"swift" => Some(Language::Swift),
10881086
b"phtml" => Some(Language::Php),
10891087
b"php3" | b"php4" | b"php5" | b"phps" => Some(Language::Php),
10901088
b"beam" => Some(Language::Erlang),
1091-
1089+
10921090
// Fallback to string comparison for edge cases
10931091
_ => {
10941092
let ext_str = ext.to_str()?.to_ascii_lowercase();

0 commit comments

Comments
 (0)