Skip to content

Commit 31458a7

Browse files
cursoragentscript3r
andcommitted
feat: Add recursive CBOM generation and support for nested projects
Co-authored-by: script3r <[email protected]>
1 parent 668bbcf commit 31458a7

File tree

26 files changed

+675
-25
lines changed

26 files changed

+675
-25
lines changed

README.md

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@ cargo build --release
1818
Generate MV-CBOM (Cryptographic Bill of Materials):
1919

2020
```bash
21+
# Single project CBOM
2122
./target/release/cipherscope . --cbom
23+
24+
# Recursive CBOM generation for all discovered projects
25+
./target/release/cipherscope . --cbom-recursive
2226
```
2327

2428
JSONL and SARIF:
@@ -30,6 +34,7 @@ JSONL and SARIF:
3034

3135
Key flags:
3236
- `--cbom`: generate MV-CBOM (Minimal Viable Cryptographic Bill of Materials)
37+
- `--cbom-recursive`: generate MV-CBOMs recursively for all discovered projects
3338
- `--threads N`: set thread pool size
3439
- `--max-file-size MB`: skip large files (default 2)
3540
- `--patterns PATH`: specify patterns file (default: `patterns.toml`)
@@ -55,6 +60,7 @@ The MV-CBOM includes:
5560
- **Cryptographic Assets**: Algorithms, certificates, and related crypto material with NIST security levels
5661
- **Dependency Relationships**: Distinguishes between "uses" (actively called) vs "implements" (available but unused)
5762
- **Parameter Extraction**: Key sizes, curves, and other algorithm-specific parameters
63+
- **Recursive Project Discovery**: Automatically discovers and analyzes nested projects (BUCK, Bazel, Maven modules, etc.)
5864

5965
Example MV-CBOM snippet:
6066
```json
@@ -194,12 +200,13 @@ The MV-CBOM generation is implemented in the `cbom-generator` crate with modular
194200
- Extensible: new algorithms added by editing patterns, not code
195201

196202
The MV-CBOM pipeline:
197-
1. **Static Analysis**: Scanner finds cryptographic usage patterns using `patterns.toml`
198-
2. **Algorithm Detection**: **Pattern-driven** extraction of algorithms and parameters
199-
3. **Certificate Parsing**: Discovers and analyzes X.509 certificates in the project
200-
4. **Project Analysis**: Multi-language dependency parsing (Cargo, Maven, go.mod, Makefile, Bazel, BUCK, etc.)
201-
5. **Dependency Analysis**: Correlates project dependencies with actual code usage
202-
6. **CBOM Generation**: Produces standards-compliant JSON with NIST security levels
203+
1. **Project Discovery**: **Recursive** scanning for project files (BUILD, pom.xml, Cargo.toml, etc.)
204+
2. **Static Analysis**: Scanner finds cryptographic usage patterns using `patterns.toml`
205+
3. **Algorithm Detection**: **Pattern-driven** extraction of algorithms and parameters
206+
4. **Certificate Parsing**: Discovers and analyzes X.509 certificates in each project
207+
5. **Project Analysis**: Multi-language dependency parsing (Cargo, Maven, go.mod, Makefile, Bazel, BUCK, etc.)
208+
6. **Dependency Analysis**: Correlates project dependencies with actual code usage per project
209+
7. **CBOM Generation**: Produces standards-compliant JSON with NIST security levels (one per project)
203210

204211
### Tests & Benchmarks
205212

@@ -256,6 +263,14 @@ cat fixtures/rust/rsa-vulnerable/mv-cbom.json | jq '.cryptoAssets[] | select(.as
256263

257264
# Test certificate parsing
258265
./target/release/cipherscope fixtures/certificates/x509-rsa-ecdsa --cbom
266+
267+
# Test recursive project discovery
268+
./target/release/cipherscope fixtures/buck-nested --cbom-recursive
269+
./target/release/cipherscope fixtures/bazel-nested --cbom-recursive
270+
271+
# Verify multiple CBOMs generated
272+
find fixtures/buck-nested -name "mv-cbom.json" | wc -l # Should show 3
273+
find fixtures/bazel-nested -name "mv-cbom.json" | wc -l # Should show 4
259274
```
260275

261276
Benchmark scan throughput on test fixtures:

crates/cbom-generator/src/lib.rs

Lines changed: 85 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use chrono::{DateTime, Utc};
99
use scanner_core::{Finding, PatternRegistry};
1010
use serde::{Deserialize, Serialize};
1111
use std::fs;
12-
use std::path::Path;
12+
use std::path::{Path, PathBuf};
1313
use std::sync::Arc;
1414
use uuid::Uuid;
1515

@@ -211,7 +211,7 @@ impl CbomGenerator {
211211
}
212212
}
213213

214-
/// Generate an MV-CBOM for the given directory
214+
/// Generate an MV-CBOM for the given directory (single project)
215215
pub fn generate_cbom(&self, scan_path: &Path, findings: &[Finding]) -> Result<MvCbom> {
216216
let scan_path = scan_path.canonicalize()
217217
.with_context(|| format!("Failed to canonicalize path: {}", scan_path.display()))?;
@@ -267,6 +267,76 @@ impl CbomGenerator {
267267
Ok(cbom)
268268
}
269269

270+
/// Generate MV-CBOMs for all projects discovered recursively
271+
pub fn generate_cboms_recursive(&self, scan_path: &Path, findings: &[Finding]) -> Result<Vec<(PathBuf, MvCbom)>> {
272+
let scan_path = scan_path.canonicalize()
273+
.with_context(|| format!("Failed to canonicalize path: {}", scan_path.display()))?;
274+
275+
// Discover all projects recursively
276+
let discovered_projects = self.project_parser.discover_projects(&scan_path)?;
277+
278+
let mut cboms = Vec::new();
279+
280+
for (project_path, project_info, project_dependencies) in discovered_projects {
281+
// Filter findings relevant to this specific project
282+
let project_findings: Vec<Finding> = findings.iter()
283+
.filter(|finding| {
284+
// Check if the finding's file is within this project's directory
285+
finding.file.starts_with(&project_path)
286+
})
287+
.cloned()
288+
.collect();
289+
290+
// Create component info from parsed project information
291+
let component_info = ComponentInfo {
292+
name: project_info.name,
293+
version: project_info.version,
294+
path: project_path.display().to_string(),
295+
};
296+
297+
// Parse certificates in this project directory
298+
let certificates = self.certificate_parser.parse_certificates(&project_path)?;
299+
300+
// Detect algorithms from findings and static analysis for this project
301+
let algorithms = self.algorithm_detector.detect_algorithms(&project_path, &project_findings)?;
302+
303+
// Analyze dependencies for this project
304+
let dependencies = self.dependency_analyzer.analyze_dependencies(
305+
&component_info,
306+
&algorithms,
307+
&certificates,
308+
&project_dependencies,
309+
&project_findings,
310+
)?;
311+
312+
// Build crypto assets list
313+
let mut crypto_assets = Vec::new();
314+
crypto_assets.extend(algorithms);
315+
crypto_assets.extend(certificates);
316+
317+
let cbom = MvCbom {
318+
bom_format: "MV-CBOM".to_string(),
319+
spec_version: "1.0".to_string(),
320+
serial_number: format!("urn:uuid:{}", Uuid::new_v4()),
321+
version: 1,
322+
metadata: CbomMetadata {
323+
component: component_info,
324+
timestamp: Utc::now(),
325+
tools: vec![ToolInfo {
326+
name: "cipherscope".to_string(),
327+
version: env!("CARGO_PKG_VERSION").to_string(),
328+
vendor: "CipherScope Contributors".to_string(),
329+
}],
330+
},
331+
crypto_assets,
332+
dependencies,
333+
};
334+
335+
cboms.push((project_path, cbom));
336+
}
337+
338+
Ok(cboms)
339+
}
270340

271341
/// Write the MV-CBOM to a JSON file
272342
pub fn write_cbom(&self, cbom: &MvCbom, output_path: &Path) -> Result<()> {
@@ -278,6 +348,19 @@ impl CbomGenerator {
278348

279349
Ok(())
280350
}
351+
352+
/// Write multiple MV-CBOMs to JSON files (one per project)
353+
pub fn write_cboms(&self, cboms: &[(PathBuf, MvCbom)]) -> Result<Vec<PathBuf>> {
354+
let mut written_files = Vec::new();
355+
356+
for (project_path, cbom) in cboms {
357+
let output_path = project_path.join("mv-cbom.json");
358+
self.write_cbom(cbom, &output_path)?;
359+
written_files.push(output_path);
360+
}
361+
362+
Ok(written_files)
363+
}
281364
}
282365

283366
impl Default for CbomGenerator {

crates/cbom-generator/src/project_parser.rs

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ use anyhow::{Context, Result};
44
use serde::Deserialize;
55
use std::collections::HashMap;
66
use std::fs;
7-
use std::path::Path;
7+
use std::path::{Path, PathBuf};
88
use regex::Regex;
9+
use walkdir::WalkDir;
910

1011
/// Information about a project dependency
1112
#[derive(Debug, Clone)]
@@ -84,7 +85,7 @@ impl ProjectParser {
8485
parser
8586
}
8687

87-
/// Parse project information and dependencies from a directory
88+
/// Parse project information and dependencies from a directory (non-recursive)
8889
pub fn parse_project(&self, scan_path: &Path) -> Result<(ProjectInfo, Vec<ProjectDependency>)> {
8990
// Try to detect project type by looking for common files
9091
if let Some((project_type, file_path)) = self.detect_project_type(scan_path) {
@@ -122,6 +123,90 @@ impl ProjectParser {
122123
}
123124
}
124125

126+
/// Recursively discover all projects in a directory tree
127+
pub fn discover_projects(&self, scan_path: &Path) -> Result<Vec<(PathBuf, ProjectInfo, Vec<ProjectDependency>)>> {
128+
let mut projects = Vec::new();
129+
130+
// Use walkdir to recursively scan for project files
131+
for entry in walkdir::WalkDir::new(scan_path)
132+
.into_iter()
133+
.filter_map(|e| e.ok())
134+
.filter(|e| e.file_type().is_file())
135+
{
136+
let file_path = entry.path();
137+
let dir_path = file_path.parent().unwrap_or(scan_path);
138+
139+
// Check if this file indicates a project root
140+
if let Some(project_type) = self.classify_project_file(file_path) {
141+
// Skip if we already found a project in this directory
142+
if projects.iter().any(|(path, _, _)| path == dir_path) {
143+
continue;
144+
}
145+
146+
// Parse the project
147+
match self.parse_project_from_file(file_path, dir_path, project_type) {
148+
Ok((project_info, dependencies)) => {
149+
projects.push((dir_path.to_path_buf(), project_info, dependencies));
150+
}
151+
Err(e) => {
152+
eprintln!("Warning: Failed to parse project at {}: {}", dir_path.display(), e);
153+
}
154+
}
155+
}
156+
}
157+
158+
// If no projects found, create a default one for the root
159+
if projects.is_empty() {
160+
let (project_info, dependencies) = self.parse_project(scan_path)?;
161+
projects.push((scan_path.to_path_buf(), project_info, dependencies));
162+
}
163+
164+
Ok(projects)
165+
}
166+
167+
/// Classify a file to determine if it's a project configuration file
168+
fn classify_project_file(&self, file_path: &Path) -> Option<ProjectType> {
169+
let file_name = file_path.file_name()?.to_str()?;
170+
171+
match file_name {
172+
"Cargo.toml" => Some(ProjectType::Cargo),
173+
"pom.xml" => Some(ProjectType::Maven),
174+
"build.gradle" | "build.gradle.kts" => Some(ProjectType::Gradle),
175+
"go.mod" => Some(ProjectType::GoMod),
176+
"package.json" => Some(ProjectType::NPM),
177+
"requirements.txt" => Some(ProjectType::Requirements),
178+
"Pipfile" => Some(ProjectType::Pipfile),
179+
"Gemfile" => Some(ProjectType::Gemfile),
180+
"composer.json" => Some(ProjectType::Composer),
181+
"Makefile" | "makefile" => Some(ProjectType::Makefile),
182+
"CMakeLists.txt" => Some(ProjectType::CMake),
183+
"WORKSPACE" | "BUILD" | "BUILD.bazel" => Some(ProjectType::Bazel),
184+
"BUCK" | ".buckconfig" => Some(ProjectType::Buck),
185+
name if name.ends_with(".podspec") => Some(ProjectType::Podspec),
186+
_ => None,
187+
}
188+
}
189+
190+
/// Parse a project from a specific file and directory
191+
fn parse_project_from_file(&self, file_path: &Path, dir_path: &Path, project_type: ProjectType) -> Result<(ProjectInfo, Vec<ProjectDependency>)> {
192+
match project_type {
193+
ProjectType::Cargo => self.parse_cargo_project(file_path),
194+
ProjectType::Maven => self.parse_maven_project(file_path),
195+
ProjectType::Gradle => self.parse_gradle_project(file_path),
196+
ProjectType::GoMod => self.parse_go_project(file_path),
197+
ProjectType::NPM => self.parse_npm_project(file_path),
198+
ProjectType::Requirements => self.parse_requirements_project(file_path, dir_path),
199+
ProjectType::Pipfile => self.parse_pipfile_project(file_path),
200+
ProjectType::Gemfile => self.parse_gemfile_project(file_path),
201+
ProjectType::Composer => self.parse_composer_project(file_path),
202+
ProjectType::Makefile => self.parse_makefile_project(file_path, dir_path),
203+
ProjectType::CMake => self.parse_cmake_project(file_path, dir_path),
204+
ProjectType::Podspec => self.parse_podspec_project(file_path),
205+
ProjectType::Bazel => self.parse_bazel_project(file_path, dir_path),
206+
ProjectType::Buck => self.parse_buck_project(file_path, dir_path),
207+
}
208+
}
209+
125210
/// Detect project type by scanning for common configuration files
126211
fn detect_project_type(&self, scan_path: &Path) -> Option<(ProjectType, std::path::PathBuf)> {
127212
let candidates = vec![

crates/cli/src/main.rs

Lines changed: 57 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ struct Args {
2727
#[arg(long, action = ArgAction::SetTrue)]
2828
cbom: bool,
2929

30+
/// Generate MV-CBOMs recursively for all discovered projects
31+
#[arg(long, action = ArgAction::SetTrue)]
32+
cbom_recursive: bool,
33+
3034
/// Number of threads
3135
#[arg(long, value_name = "N")]
3236
threads: Option<usize>,
@@ -201,31 +205,69 @@ fn main() -> Result<()> {
201205
}
202206

203207
// Generate MV-CBOM if requested
204-
if args.cbom {
208+
if args.cbom || args.cbom_recursive {
205209
let cbom_generator = CbomGenerator::with_registry(reg.clone());
206210

207211
// Use the first path as the scan root for CBOM generation
208212
let default_path = PathBuf::from(".");
209213
let scan_path = args.paths.first().unwrap_or(&default_path);
210214

211-
match cbom_generator.generate_cbom(scan_path, &findings) {
212-
Ok(cbom) => {
213-
let output_path = scan_path.join("mv-cbom.json");
214-
match cbom_generator.write_cbom(&cbom, &output_path) {
215-
Ok(()) => {
216-
if !args.json {
217-
println!("MV-CBOM written to: {}", output_path.display());
218-
println!("Found {} cryptographic assets", cbom.crypto_assets.len());
219-
println!("Created {} dependency relationships", cbom.dependencies.len());
215+
if args.cbom_recursive {
216+
// Recursive CBOM generation for all discovered projects
217+
match cbom_generator.generate_cboms_recursive(scan_path, &findings) {
218+
Ok(cboms) => {
219+
match cbom_generator.write_cboms(&cboms) {
220+
Ok(written_files) => {
221+
if !args.json {
222+
println!("Generated {} MV-CBOMs for discovered projects:", cboms.len());
223+
let mut total_assets = 0;
224+
let mut total_dependencies = 0;
225+
226+
for (i, (project_path, cbom)) in cboms.iter().enumerate() {
227+
total_assets += cbom.crypto_assets.len();
228+
total_dependencies += cbom.dependencies.len();
229+
println!(" {}. {}: {} assets, {} dependencies",
230+
i + 1,
231+
project_path.display(),
232+
cbom.crypto_assets.len(),
233+
cbom.dependencies.len());
234+
}
235+
236+
println!("Total: {} cryptographic assets, {} dependency relationships",
237+
total_assets, total_dependencies);
238+
println!("Files written: {}", written_files.len());
239+
}
240+
}
241+
Err(e) => {
242+
eprintln!("Failed to write MV-CBOMs: {}", e);
220243
}
221244
}
222-
Err(e) => {
223-
eprintln!("Failed to write MV-CBOM: {}", e);
224-
}
245+
}
246+
Err(e) => {
247+
eprintln!("Failed to generate recursive MV-CBOMs: {}", e);
225248
}
226249
}
227-
Err(e) => {
228-
eprintln!("Failed to generate MV-CBOM: {}", e);
250+
} else {
251+
// Single CBOM generation
252+
match cbom_generator.generate_cbom(scan_path, &findings) {
253+
Ok(cbom) => {
254+
let output_path = scan_path.join("mv-cbom.json");
255+
match cbom_generator.write_cbom(&cbom, &output_path) {
256+
Ok(()) => {
257+
if !args.json {
258+
println!("MV-CBOM written to: {}", output_path.display());
259+
println!("Found {} cryptographic assets", cbom.crypto_assets.len());
260+
println!("Created {} dependency relationships", cbom.dependencies.len());
261+
}
262+
}
263+
Err(e) => {
264+
eprintln!("Failed to write MV-CBOM: {}", e);
265+
}
266+
}
267+
}
268+
Err(e) => {
269+
eprintln!("Failed to generate MV-CBOM: {}", e);
270+
}
229271
}
230272
}
231273
}

0 commit comments

Comments
 (0)