Skip to content

Commit fa0ddb3

Browse files
cursoragentscript3r
andcommitted
Refactor: Improve crypto asset detection and dependency analysis
This commit refactors the crypto asset detection and dependency analysis logic. It includes improvements to the algorithm detector, certificate parser, and dependency analyzer. The project parser also receives updates for better handling of various project types. These changes enhance the accuracy and robustness of the MV-CBOM generation process. Co-authored-by: script3r <[email protected]>
1 parent ef33df3 commit fa0ddb3

File tree

12 files changed

+871
-476
lines changed

12 files changed

+871
-476
lines changed

crates/cbom-generator/src/algorithm_detector.rs

Lines changed: 93 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,15 @@
11
//! Algorithm detection functionality for extracting cryptographic algorithms from source code
22
33
use anyhow::{Context, Result};
4-
use regex::Regex;
5-
use scanner_core::{Finding, PatternRegistry, CompiledAlgorithm};
4+
use scanner_core::{CompiledAlgorithm, Finding, PatternRegistry};
65
use serde_json::json;
76
use std::collections::{HashMap, HashSet};
87
use std::fs;
98
use std::path::Path;
109
use uuid::Uuid;
1110
use walkdir::WalkDir;
1211

13-
use crate::{
14-
AlgorithmProperties, AssetProperties, AssetType, CryptographicPrimitive, CryptoAsset,
15-
};
12+
use crate::{AlgorithmProperties, AssetProperties, AssetType, CryptoAsset, CryptographicPrimitive};
1613

1714
/// Detector for cryptographic algorithms in source code
1815
pub struct AlgorithmDetector {
@@ -22,9 +19,7 @@ pub struct AlgorithmDetector {
2219

2320
impl AlgorithmDetector {
2421
pub fn new() -> Self {
25-
Self {
26-
registry: None,
27-
}
22+
Self { registry: None }
2823
}
2924

3025
pub fn with_registry(registry: std::sync::Arc<PatternRegistry>) -> Self {
@@ -34,17 +29,26 @@ impl AlgorithmDetector {
3429
}
3530

3631
/// Detect algorithms from scanner findings using pattern registry
37-
pub fn detect_algorithms(&self, scan_path: &Path, findings: &[Finding]) -> Result<Vec<CryptoAsset>> {
32+
pub fn detect_algorithms(
33+
&self,
34+
scan_path: &Path,
35+
findings: &[Finding],
36+
) -> Result<Vec<CryptoAsset>> {
3837
let mut algorithms = Vec::new();
3938
let mut seen_algorithms = HashSet::new();
4039

4140
if let Some(registry) = &self.registry {
4241
// Extract algorithms from findings using registry patterns
4342
for finding in findings {
44-
if let Some(algorithm_assets) = self.extract_algorithms_from_finding_with_registry(finding, registry)? {
43+
if let Some(algorithm_assets) =
44+
self.extract_algorithms_from_finding_with_registry(finding, registry)?
45+
{
4546
for asset in algorithm_assets {
46-
let key = format!("{}:{}", asset.name.as_ref().unwrap_or(&"unknown".to_string()),
47-
asset.bom_ref);
47+
let key = format!(
48+
"{}:{}",
49+
asset.name.as_ref().unwrap_or(&"unknown".to_string()),
50+
asset.bom_ref
51+
);
4852
if seen_algorithms.insert(key) {
4953
algorithms.push(asset);
5054
}
@@ -53,21 +57,30 @@ impl AlgorithmDetector {
5357
}
5458

5559
// Perform additional static analysis for parameter extraction
56-
let additional_algorithms = self.perform_deep_static_analysis_with_registry(scan_path, registry)?;
60+
let additional_algorithms =
61+
self.perform_deep_static_analysis_with_registry(scan_path, registry)?;
5762
for asset in additional_algorithms {
58-
let key = format!("{}:{}", asset.name.as_ref().unwrap_or(&"unknown".to_string()),
59-
asset.bom_ref);
63+
let key = format!(
64+
"{}:{}",
65+
asset.name.as_ref().unwrap_or(&"unknown".to_string()),
66+
asset.bom_ref
67+
);
6068
if seen_algorithms.insert(key) {
6169
algorithms.push(asset);
6270
}
6371
}
6472
} else {
6573
// Fallback to hardcoded detection if no registry available
6674
for finding in findings {
67-
if let Some(algorithm_assets) = self.extract_algorithms_from_finding_fallback(finding)? {
75+
if let Some(algorithm_assets) =
76+
self.extract_algorithms_from_finding_fallback(finding)?
77+
{
6878
for asset in algorithm_assets {
69-
let key = format!("{}:{}", asset.name.as_ref().unwrap_or(&"unknown".to_string()),
70-
asset.bom_ref);
79+
let key = format!(
80+
"{}:{}",
81+
asset.name.as_ref().unwrap_or(&"unknown".to_string()),
82+
asset.bom_ref
83+
);
7184
if seen_algorithms.insert(key) {
7285
algorithms.push(asset);
7386
}
@@ -80,7 +93,11 @@ impl AlgorithmDetector {
8093
}
8194

8295
/// Extract algorithms from finding using pattern registry
83-
fn extract_algorithms_from_finding_with_registry(&self, finding: &Finding, registry: &PatternRegistry) -> Result<Option<Vec<CryptoAsset>>> {
96+
fn extract_algorithms_from_finding_with_registry(
97+
&self,
98+
finding: &Finding,
99+
registry: &PatternRegistry,
100+
) -> Result<Option<Vec<CryptoAsset>>> {
84101
let mut algorithms = Vec::new();
85102

86103
// Find the library in the registry
@@ -91,7 +108,7 @@ impl AlgorithmDetector {
91108
if self.symbol_matches_algorithm(&finding.symbol, algorithm) {
92109
// Extract parameters from the finding
93110
let parameters = self.extract_parameters_from_finding(finding, algorithm)?;
94-
111+
95112
// Create the algorithm asset
96113
let asset = self.create_algorithm_asset_from_spec(algorithm, parameters)?;
97114
algorithms.push(asset);
@@ -107,7 +124,10 @@ impl AlgorithmDetector {
107124
}
108125

109126
/// Fallback algorithm extraction for when no registry is available
110-
fn extract_algorithms_from_finding_fallback(&self, finding: &Finding) -> Result<Option<Vec<CryptoAsset>>> {
127+
fn extract_algorithms_from_finding_fallback(
128+
&self,
129+
finding: &Finding,
130+
) -> Result<Option<Vec<CryptoAsset>>> {
111131
// Simplified fallback logic
112132
let symbol = &finding.symbol.to_lowercase();
113133
let mut algorithms = Vec::new();
@@ -137,26 +157,33 @@ impl AlgorithmDetector {
137157
}
138158

139159
// Check if symbol matches any of the algorithm's symbol patterns
140-
algorithm.symbol_patterns.iter().any(|pattern| pattern.is_match(symbol))
160+
algorithm
161+
.symbol_patterns
162+
.iter()
163+
.any(|pattern| pattern.is_match(symbol))
141164
}
142165

143166
/// Extract parameters from finding using algorithm's parameter patterns
144-
fn extract_parameters_from_finding(&self, finding: &Finding, algorithm: &CompiledAlgorithm) -> Result<HashMap<String, serde_json::Value>> {
167+
fn extract_parameters_from_finding(
168+
&self,
169+
finding: &Finding,
170+
algorithm: &CompiledAlgorithm,
171+
) -> Result<HashMap<String, serde_json::Value>> {
145172
let mut parameters = HashMap::new();
146173

147174
// Extract parameters from symbol
148175
for param_pattern in &algorithm.parameter_patterns {
149176
if let Some(captures) = param_pattern.pattern.captures(&finding.symbol) {
150177
if let Some(value_match) = captures.get(1) {
151178
let value_str = value_match.as_str();
152-
179+
153180
// Try to parse as number first, then as string
154181
let value = if let Ok(num) = value_str.parse::<u64>() {
155182
json!(num)
156183
} else {
157184
json!(value_str)
158185
};
159-
186+
160187
parameters.insert(param_pattern.name.clone(), value);
161188
}
162189
} else if let Some(default) = &param_pattern.default_value {
@@ -169,9 +196,13 @@ impl AlgorithmDetector {
169196
}
170197

171198
/// Create algorithm asset from algorithm spec and extracted parameters
172-
fn create_algorithm_asset_from_spec(&self, algorithm: &CompiledAlgorithm, parameters: HashMap<String, serde_json::Value>) -> Result<CryptoAsset> {
199+
fn create_algorithm_asset_from_spec(
200+
&self,
201+
algorithm: &CompiledAlgorithm,
202+
parameters: HashMap<String, serde_json::Value>,
203+
) -> Result<CryptoAsset> {
173204
let primitive = self.parse_primitive(&algorithm.primitive)?;
174-
205+
175206
let parameter_set = if parameters.is_empty() {
176207
None
177208
} else {
@@ -206,7 +237,11 @@ impl AlgorithmDetector {
206237
}
207238

208239
/// Perform deep static analysis using registry patterns
209-
fn perform_deep_static_analysis_with_registry(&self, scan_path: &Path, registry: &PatternRegistry) -> Result<Vec<CryptoAsset>> {
240+
fn perform_deep_static_analysis_with_registry(
241+
&self,
242+
scan_path: &Path,
243+
registry: &PatternRegistry,
244+
) -> Result<Vec<CryptoAsset>> {
210245
let mut algorithms = Vec::new();
211246

212247
// Walk through source files for parameter extraction
@@ -216,9 +251,12 @@ impl AlgorithmDetector {
216251
.filter(|e| e.file_type().is_file())
217252
{
218253
let path = entry.path();
219-
254+
220255
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
221-
if matches!(ext, "rs" | "java" | "go" | "py" | "c" | "cpp" | "swift" | "js" | "php") {
256+
if matches!(
257+
ext,
258+
"rs" | "java" | "go" | "py" | "c" | "cpp" | "swift" | "js" | "php"
259+
) {
222260
if let Ok(mut extracted) = self.analyze_file_with_registry(path, registry) {
223261
algorithms.append(&mut extracted);
224262
}
@@ -230,7 +268,11 @@ impl AlgorithmDetector {
230268
}
231269

232270
/// Analyze a source file using registry patterns
233-
fn analyze_file_with_registry(&self, file_path: &Path, registry: &PatternRegistry) -> Result<Vec<CryptoAsset>> {
271+
fn analyze_file_with_registry(
272+
&self,
273+
file_path: &Path,
274+
registry: &PatternRegistry,
275+
) -> Result<Vec<CryptoAsset>> {
234276
let content = fs::read_to_string(file_path)
235277
.with_context(|| format!("Failed to read file: {}", file_path.display()))?;
236278

@@ -243,7 +285,7 @@ impl AlgorithmDetector {
243285
for symbol_pattern in &algorithm.symbol_patterns {
244286
for symbol_match in symbol_pattern.find_iter(&content) {
245287
let symbol = symbol_match.as_str();
246-
288+
247289
// Extract parameters from the matched symbol
248290
let mut parameters = HashMap::new();
249291
for param_pattern in &algorithm.parameter_patterns {
@@ -347,16 +389,25 @@ mod tests {
347389
#[test]
348390
fn test_primitive_parsing() {
349391
let detector = AlgorithmDetector::new();
350-
351-
assert!(matches!(detector.parse_primitive("signature").unwrap(), CryptographicPrimitive::Signature));
352-
assert!(matches!(detector.parse_primitive("aead").unwrap(), CryptographicPrimitive::AuthenticatedEncryption));
353-
assert!(matches!(detector.parse_primitive("hash").unwrap(), CryptographicPrimitive::Hash));
392+
393+
assert!(matches!(
394+
detector.parse_primitive("signature").unwrap(),
395+
CryptographicPrimitive::Signature
396+
));
397+
assert!(matches!(
398+
detector.parse_primitive("aead").unwrap(),
399+
CryptographicPrimitive::AuthenticatedEncryption
400+
));
401+
assert!(matches!(
402+
detector.parse_primitive("hash").unwrap(),
403+
CryptographicPrimitive::Hash
404+
));
354405
}
355406

356407
#[test]
357408
fn test_fallback_algorithm_extraction() {
358409
let detector = AlgorithmDetector::new();
359-
410+
360411
let finding = Finding {
361412
language: Language::Rust,
362413
library: "unknown".to_string(),
@@ -367,11 +418,13 @@ mod tests {
367418
detector_id: "detector-rust".to_string(),
368419
};
369420

370-
let algorithms = detector.extract_algorithms_from_finding_fallback(&finding).unwrap();
421+
let algorithms = detector
422+
.extract_algorithms_from_finding_fallback(&finding)
423+
.unwrap();
371424
assert!(algorithms.is_some());
372-
425+
373426
let algos = algorithms.unwrap();
374427
assert_eq!(algos.len(), 1);
375428
assert_eq!(algos[0].name, Some("RSA".to_string()));
376429
}
377-
}
430+
}

0 commit comments

Comments
 (0)