Skip to content

Commit 6a72bb9

Browse files
committed
feat: parallel file analysis with --parallel flag
Add concurrent file analysis support using asyncio.Semaphore. The --parallel N option controls how many files are analyzed simultaneously (default 1 preserves sequential behavior). Configurable via CLI flag, config file, or programmatic API. Includes tests for parallel scan paths.
1 parent d89ebb9 commit 6a72bb9

File tree

5 files changed

+115
-17
lines changed

5 files changed

+115
-17
lines changed

src/ai_sec_scan/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from ai_sec_scan.models import Finding, ScanResult, Severity
44
from ai_sec_scan.providers.base import BaseProvider
5-
from ai_sec_scan.scanner import collect_files, run_scan_sync
5+
from ai_sec_scan.scanner import collect_files, run_scan_sync, scan
66

77
__version__ = "0.2.0"
88

@@ -14,4 +14,5 @@
1414
"__version__",
1515
"collect_files",
1616
"run_scan_sync",
17+
"scan",
1718
]

src/ai_sec_scan/cli.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
"quiet": "quiet",
4848
"cache_dir": "cache_dir",
4949
"no_cache": "no_cache",
50+
"parallel": "parallel",
5051
}
5152

5253

@@ -241,6 +242,13 @@ def version() -> None:
241242
default=False,
242243
help="Disable result caching.",
243244
)
245+
@click.option(
246+
"--parallel",
247+
default=1,
248+
type=int,
249+
show_default=True,
250+
help="Number of files to analyze concurrently.",
251+
)
244252
def scan(
245253
path: str,
246254
provider: str,
@@ -256,6 +264,7 @@ def scan(
256264
quiet: bool,
257265
cache_dir: str | None,
258266
no_cache: bool,
267+
parallel: int,
259268
) -> None:
260269
"""Scan a file or directory for security vulnerabilities."""
261270
from ai_sec_scan.scanner import collect_files, run_scan_sync
@@ -303,6 +312,7 @@ def scan(
303312
quiet=quiet,
304313
cache_dir=Path(cache_dir) if cache_dir else None,
305314
no_cache=no_cache,
315+
parallel=parallel,
306316
)
307317

308318
# Render output

src/ai_sec_scan/scanner.py

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ async def scan(
144144
quiet: bool = False,
145145
cache_dir: Path | None = None,
146146
no_cache: bool = False,
147+
parallel: int = 1,
147148
) -> ScanResult:
148149
"""Run a security scan on a file or directory.
149150
@@ -157,6 +158,9 @@ async def scan(
157158
quiet: Suppress progress output.
158159
cache_dir: Directory for the result cache. ``None`` uses the default.
159160
no_cache: Disable caching entirely when ``True``.
161+
parallel: Maximum number of files to analyze concurrently. Defaults
162+
to ``1`` (sequential). Values above 1 enable parallel analysis
163+
using an ``asyncio.Semaphore``.
160164
161165
Returns:
162166
ScanResult with all findings.
@@ -175,31 +179,67 @@ async def scan(
175179
)
176180

177181
cache = None if no_cache else ResultCache(cache_dir=cache_dir)
182+
concurrency = max(1, parallel)
178183

179184
all_findings: list[Finding] = []
180185
start_time = time.monotonic()
181186

182-
if quiet:
183-
for fp in files:
184-
rel_path = str(fp.relative_to(path)) if path.is_dir() else fp.name
185-
findings = await _analyze_file(fp, rel_path, provider, cache)
186-
all_findings.extend(findings)
187-
else:
188-
with Progress(
189-
SpinnerColumn(),
190-
TextColumn("[progress.description]{task.description}"),
191-
console=console,
192-
) as progress:
193-
task = progress.add_task("Scanning...", total=len(files))
194-
187+
if concurrency == 1:
188+
# Sequential path (original behaviour)
189+
if quiet:
195190
for fp in files:
196191
rel_path = str(fp.relative_to(path)) if path.is_dir() else fp.name
197-
progress.update(task, description=f"Scanning {rel_path}")
192+
findings = await _analyze_file(fp, rel_path, provider, cache)
193+
all_findings.extend(findings)
194+
else:
195+
with Progress(
196+
SpinnerColumn(),
197+
TextColumn("[progress.description]{task.description}"),
198+
console=console,
199+
) as progress:
200+
task = progress.add_task("Scanning...", total=len(files))
201+
202+
for fp in files:
203+
rel_path = str(fp.relative_to(path)) if path.is_dir() else fp.name
204+
progress.update(task, description=f"Scanning {rel_path}")
205+
206+
findings = await _analyze_file(fp, rel_path, provider, cache)
207+
all_findings.extend(findings)
208+
209+
progress.advance(task)
210+
else:
211+
# Parallel path
212+
semaphore = asyncio.Semaphore(concurrency)
213+
results_lock = asyncio.Lock()
214+
215+
progress_ctx = (
216+
None
217+
if quiet
218+
else Progress(
219+
SpinnerColumn(),
220+
TextColumn("[progress.description]{task.description}"),
221+
console=console,
222+
)
223+
)
224+
225+
task_id = None
226+
if progress_ctx is not None:
227+
progress_ctx.start()
228+
task_id = progress_ctx.add_task("Scanning...", total=len(files))
198229

230+
async def _process(fp: Path) -> None:
231+
rel_path = str(fp.relative_to(path)) if path.is_dir() else fp.name
232+
async with semaphore:
199233
findings = await _analyze_file(fp, rel_path, provider, cache)
234+
async with results_lock:
200235
all_findings.extend(findings)
236+
if progress_ctx is not None and task_id is not None:
237+
progress_ctx.advance(task_id)
238+
239+
await asyncio.gather(*[_process(fp) for fp in files])
201240

202-
progress.advance(task)
241+
if progress_ctx is not None:
242+
progress_ctx.stop()
203243

204244
duration = time.monotonic() - start_time
205245

@@ -228,11 +268,12 @@ def run_scan_sync(
228268
quiet: bool = False,
229269
cache_dir: Path | None = None,
230270
no_cache: bool = False,
271+
parallel: int = 1,
231272
) -> ScanResult:
232273
"""Synchronous wrapper for the async scan function."""
233274
return asyncio.run(
234275
scan(
235276
path, provider, include, exclude, max_file_size_kb,
236-
min_severity, quiet, cache_dir, no_cache,
277+
min_severity, quiet, cache_dir, no_cache, parallel,
237278
)
238279
)

tests/test_cli.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def fake_run_scan_sync( # type: ignore[no-untyped-def]
4646
quiet: bool = False,
4747
cache_dir: Path | None = None,
4848
no_cache: bool = False,
49+
parallel: int = 1,
4950
) -> ScanResult:
5051
captured["include"] = include
5152
captured["exclude"] = exclude
@@ -150,6 +151,7 @@ def fake_run_scan_sync( # type: ignore[no-untyped-def]
150151
quiet: bool = False,
151152
cache_dir: Path | None = None,
152153
no_cache: bool = False,
154+
parallel: int = 1,
153155
) -> ScanResult:
154156
captured["quiet"] = quiet
155157
return ScanResult(
@@ -195,6 +197,7 @@ def fake_run_scan_sync( # type: ignore[no-untyped-def]
195197
quiet: bool = False,
196198
cache_dir: Path | None = None,
197199
no_cache: bool = False,
200+
parallel: int = 1,
198201
) -> ScanResult:
199202
captured["include"] = include
200203
captured["max_file_size_kb"] = max_file_size_kb

tests/test_scanner.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,3 +192,46 @@ def test_scan_quiet_mode(self, tmp_path: Path) -> None:
192192
result = asyncio.run(scan(tmp_path, provider, quiet=True))
193193
assert result.files_scanned == 1
194194
assert result.findings == []
195+
196+
def test_parallel_scan_collects_all_findings(self, tmp_path: Path) -> None:
197+
for i in range(5):
198+
(tmp_path / f"mod{i}.py").write_text(f"x = {i}")
199+
finding = Finding(
200+
file_path="placeholder",
201+
line_start=1,
202+
severity=Severity.MEDIUM,
203+
title="Test finding",
204+
description="d",
205+
recommendation="r",
206+
)
207+
provider = MockProvider(findings=[finding])
208+
result = asyncio.run(scan(tmp_path, provider, quiet=True, parallel=3))
209+
assert result.files_scanned == 5
210+
assert len(result.findings) == 5
211+
212+
def test_parallel_scan_with_error_provider(self, tmp_path: Path) -> None:
213+
(tmp_path / "a.py").write_text("x = 1")
214+
(tmp_path / "b.py").write_text("y = 2")
215+
provider = ErrorProvider()
216+
result = asyncio.run(scan(tmp_path, provider, quiet=True, parallel=2))
217+
assert result.files_scanned == 2
218+
assert result.findings == []
219+
220+
def test_parallel_scan_severity_filter(self, tmp_path: Path) -> None:
221+
for name in ("a.py", "b.py", "c.py"):
222+
(tmp_path / name).write_text("code")
223+
findings = [
224+
Finding(
225+
file_path="x",
226+
line_start=1,
227+
severity=Severity.LOW,
228+
title="Low",
229+
description="d",
230+
recommendation="r",
231+
),
232+
]
233+
provider = MockProvider(findings=findings)
234+
result = asyncio.run(
235+
scan(tmp_path, provider, quiet=True, parallel=2, min_severity="high")
236+
)
237+
assert result.findings == []

0 commit comments

Comments
 (0)