Skip to content

Commit 58f69f9

Browse files
committed
CBOM finalizations
1 parent f781eee commit 58f69f9

File tree

7 files changed

+237
-205
lines changed

7 files changed

+237
-205
lines changed

README.md

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<img src="cipherscope.png" alt="CipherScope Logo" width="350" height="350">
55
</div>
66

7-
Fast cryptographic inventory generator. Scans codebases to identify cryptographic algorithms and assess quantum resistance.
7+
Fast cryptographic inventory generator that creates Minimal Viable Cryptographic Bill of Materials (MV-CBOM) documents. Scans codebases to identify cryptographic algorithms, certificates, and assess post-quantum cryptography readiness.
88

99
## Quick Start
1010

@@ -39,9 +39,25 @@ cargo build --release
3939

4040
## Options
4141

42+
### Core Options
4243
- `--patterns PATH` - Custom patterns file (default: `patterns.toml`)
43-
- `--progress` - Show progress bar
44-
- `--deterministic` - Reproducible output for testing
44+
- `--progress` - Show progress bar during scanning
45+
- `--deterministic` - Reproducible output for testing/ground-truth generation
46+
- `--output FILE` - Output file for single-project CBOM (default: stdout)
47+
- `--recursive` - Generate MV-CBOMs for all discovered projects
48+
- `--output-dir DIR` - Output directory for recursive CBOMs
49+
50+
### Filtering & Performance
51+
- `--threads N` - Number of processing threads
52+
- `--max-file-size MB` - Maximum file size to scan (default: 2MB)
53+
- `--include-glob GLOB` - Include files matching glob pattern(s)
54+
- `--exclude-glob GLOB` - Exclude files matching glob pattern(s)
55+
56+
### Certificate Scanning
57+
- `--skip-certificates` - Skip certificate scanning during CBOM generation
58+
59+
### Configuration
60+
- `--print-config` - Print merged patterns/config and exit
4561

4662
## Languages Supported
4763

@@ -81,4 +97,4 @@ cargo test
8197

8298
## License
8399

84-
MIT
100+
MIT

crates/cbom-generator/src/algorithm_detector.rs

Lines changed: 49 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -51,43 +51,40 @@ impl AlgorithmDetector {
5151
scan_path: &Path,
5252
findings: &[Finding],
5353
) -> Result<Vec<CryptoAsset>> {
54+
let registry = match &self.registry {
55+
Some(registry) => registry,
56+
None => return Ok(Vec::new()),
57+
};
58+
5459
let mut algorithms = Vec::new();
5560
let mut seen_algorithms = HashSet::new();
5661

57-
if let Some(registry) = &self.registry {
58-
// Extract algorithms from findings using registry patterns
59-
for finding in findings {
60-
if let Some(algorithm_assets) =
61-
self.extract_algorithms_from_finding_with_registry(finding, registry)?
62-
{
63-
for asset in algorithm_assets {
64-
let key = self.create_deduplication_key(&asset);
65-
if seen_algorithms.insert(key) {
66-
algorithms.push(asset);
67-
}
68-
}
69-
}
70-
}
71-
72-
// Only perform deep static analysis if we have a reasonable number of findings
73-
// Skip for large codebases to avoid performance issues
74-
if findings.len() < 1000 {
75-
let additional_algorithms =
76-
self.perform_deep_static_analysis_with_registry(scan_path, registry)?;
77-
for asset in additional_algorithms {
62+
// Extract algorithms from findings using registry patterns
63+
for finding in findings {
64+
if let Some(algorithm_assets) =
65+
self.extract_algorithms_from_finding_with_registry(finding, registry)?
66+
{
67+
for asset in algorithm_assets {
7868
let key = self.create_deduplication_key(&asset);
7969
if seen_algorithms.insert(key) {
8070
algorithms.push(asset);
8171
}
8272
}
8373
}
84-
} else {
85-
// No registry available; skip instead of using static fallbacks.
74+
}
75+
76+
// Always perform deep static analysis regardless of findings count
77+
let additional_algorithms =
78+
self.perform_deep_static_analysis_with_registry(scan_path, registry)?;
79+
for asset in additional_algorithms {
80+
let key = self.create_deduplication_key(&asset);
81+
if seen_algorithms.insert(key) {
82+
algorithms.push(asset);
83+
}
8684
}
8785

8886
// Merge duplicate algorithms with different parameter specificity
89-
let merged_algorithms = self.merge_algorithm_assets(algorithms);
90-
Ok(merged_algorithms)
87+
Ok(self.merge_algorithm_assets(algorithms))
9188
}
9289

9390
/// Extract algorithms from finding using pattern registry
@@ -119,7 +116,7 @@ impl AlgorithmDetector {
119116
parameters,
120117
Some(finding.library.clone()),
121118
Some(AssetEvidence {
122-
file: finding.file.display().to_string(),
119+
file: finding.file.to_string_lossy().to_string(),
123120
detector_id: finding.detector_id.clone(),
124121
line: finding.span.line,
125122
column: finding.span.column,
@@ -258,20 +255,18 @@ impl AlgorithmDetector {
258255
) -> Result<Vec<CryptoAsset>> {
259256
let mut algorithms = Vec::new();
260257

261-
// Only analyze a limited number of files to avoid performance issues
262-
const MAX_FILES_TO_ANALYZE: usize = 100;
263-
let mut files_analyzed = 0;
258+
// Analyze files for parameter extraction - removed arbitrary limits for comprehensive scanning
259+
let mut _files_analyzed = 0;
264260

265261
// Walk through source files for parameter extraction
266262
for entry in WalkDir::new(scan_path)
267-
.max_depth(5) // Limit depth to avoid deep recursion
263+
.max_depth(20) // Support very deep directory structures
268264
.into_iter()
269265
.filter_map(|e| e.ok())
270266
.filter(|e| e.file_type().is_file())
271267
{
272-
if files_analyzed >= MAX_FILES_TO_ANALYZE {
273-
break; // Stop after analyzing enough files
274-
}
268+
// Note: Removed MAX_FILES_TO_ANALYZE limit for comprehensive cryptographic analysis
269+
// In large codebases, crypto usage can be deeply nested and limits can miss important findings
275270

276271
let path = entry.path();
277272

@@ -285,7 +280,7 @@ impl AlgorithmDetector {
285280
) {
286281
if let Ok(mut extracted) = self.analyze_file_with_registry(path, registry) {
287282
algorithms.append(&mut extracted);
288-
files_analyzed += 1;
283+
_files_analyzed += 1;
289284
}
290285
}
291286
}
@@ -373,18 +368,33 @@ impl AlgorithmDetector {
373368
Ok(algorithms)
374369
}
375370

376-
/// Create a proper deduplication key based on algorithm properties, not bom_ref
371+
/// Create a deduplication key based on algorithm properties AND evidence location
372+
/// This ensures same algorithms from different files are reported separately
377373
fn create_deduplication_key(&self, asset: &CryptoAsset) -> String {
378374
match &asset.asset_properties {
379375
AssetProperties::Algorithm(props) => {
380-
// Deduplicate by algorithm name, primitive, and source library to avoid merging
381-
// different libraries' detections of the same algorithm (e.g., OpenSSL vs CommonCrypto).
376+
// Include evidence location to allow multiple instances from different files/locations
382377
let library = asset.source_library.as_deref().unwrap_or("unknown-library");
378+
let params_key = props
379+
.parameter_set
380+
.as_ref()
381+
.map(|p| format!("{:?}", p))
382+
.unwrap_or_else(|| "no-params".to_string());
383+
384+
// Include file and line information to allow same algorithm from different locations
385+
let evidence_key = if let Some(evidence) = &asset.evidence {
386+
format!("{}:{}:{}", evidence.file, evidence.line, evidence.column)
387+
} else {
388+
"no-evidence".to_string()
389+
};
390+
383391
format!(
384-
"{}:{}:{}",
392+
"{}:{}:{}:{}:{}",
385393
asset.name.as_deref().unwrap_or("unknown"),
386394
props.primitive as u8,
387-
library
395+
library,
396+
params_key,
397+
evidence_key
388398
)
389399
}
390400
_ => format!(

crates/cbom-generator/src/certificate_parser.rs

Lines changed: 21 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ impl CertificateParser {
191191
}
192192
}
193193

194-
/// Map signature algorithm OID to algorithm properties
194+
/// Map signature algorithm OID to algorithm properties (for tests)
195195
#[cfg(test)]
196196
fn map_signature_algorithm(
197197
&self,
@@ -202,113 +202,79 @@ impl CertificateParser {
202202
u8,
203203
Option<serde_json::Value>,
204204
) {
205-
match oid {
205+
let (name, primitive, level) = match oid {
206206
// RSA signature algorithms - all vulnerable to quantum attacks
207-
"1.2.840.113549.1.1.1" => (
208-
"RSA".to_string(),
209-
crate::CryptographicPrimitive::Signature,
210-
0,
211-
None,
212-
),
213-
"1.2.840.113549.1.1.4" => (
214-
"RSA with MD5".to_string(),
215-
crate::CryptographicPrimitive::Signature,
216-
0,
217-
None,
218-
),
207+
"1.2.840.113549.1.1.1" => ("RSA", crate::CryptographicPrimitive::Signature, 0),
208+
"1.2.840.113549.1.1.4" => ("RSA with MD5", crate::CryptographicPrimitive::Signature, 0),
219209
"1.2.840.113549.1.1.5" => (
220-
"RSA with SHA-1".to_string(),
210+
"RSA with SHA-1",
221211
crate::CryptographicPrimitive::Signature,
222212
0,
223-
None,
224213
),
225214
"1.2.840.113549.1.1.11" => (
226-
"RSA with SHA-256".to_string(),
215+
"RSA with SHA-256",
227216
crate::CryptographicPrimitive::Signature,
228217
0,
229-
None,
230218
),
231219
"1.2.840.113549.1.1.12" => (
232-
"RSA with SHA-384".to_string(),
220+
"RSA with SHA-384",
233221
crate::CryptographicPrimitive::Signature,
234222
0,
235-
None,
236223
),
237224
"1.2.840.113549.1.1.13" => (
238-
"RSA with SHA-512".to_string(),
225+
"RSA with SHA-512",
239226
crate::CryptographicPrimitive::Signature,
240227
0,
241-
None,
242228
),
243229

244230
// ECDSA signature algorithms - all vulnerable to quantum attacks
245231
"1.2.840.10045.4.1" => (
246-
"ECDSA with SHA-1".to_string(),
232+
"ECDSA with SHA-1",
247233
crate::CryptographicPrimitive::Signature,
248234
0,
249-
None,
250235
),
251236
"1.2.840.10045.4.3.1" => (
252-
"ECDSA with SHA-224".to_string(),
237+
"ECDSA with SHA-224",
253238
crate::CryptographicPrimitive::Signature,
254239
0,
255-
None,
256240
),
257241
"1.2.840.10045.4.3.2" => (
258-
"ECDSA with SHA-256".to_string(),
242+
"ECDSA with SHA-256",
259243
crate::CryptographicPrimitive::Signature,
260244
0,
261-
None,
262245
),
263246
"1.2.840.10045.4.3.3" => (
264-
"ECDSA with SHA-384".to_string(),
247+
"ECDSA with SHA-384",
265248
crate::CryptographicPrimitive::Signature,
266249
0,
267-
None,
268250
),
269251
"1.2.840.10045.4.3.4" => (
270-
"ECDSA with SHA-512".to_string(),
252+
"ECDSA with SHA-512",
271253
crate::CryptographicPrimitive::Signature,
272254
0,
273-
None,
274255
),
275256

276257
// EdDSA - also vulnerable to quantum attacks
277-
"1.3.101.112" => (
278-
"Ed25519".to_string(),
279-
crate::CryptographicPrimitive::Signature,
280-
0,
281-
None,
282-
),
283-
"1.3.101.113" => (
284-
"Ed448".to_string(),
285-
crate::CryptographicPrimitive::Signature,
286-
0,
287-
None,
288-
),
258+
"1.3.101.112" => ("Ed25519", crate::CryptographicPrimitive::Signature, 0),
259+
"1.3.101.113" => ("Ed448", crate::CryptographicPrimitive::Signature, 0),
289260

290261
// DSA - vulnerable to quantum attacks
291-
"1.2.840.10040.4.1" => (
292-
"DSA".to_string(),
293-
crate::CryptographicPrimitive::Signature,
294-
0,
295-
None,
296-
),
262+
"1.2.840.10040.4.1" => ("DSA", crate::CryptographicPrimitive::Signature, 0),
297263
"1.2.840.10040.4.3" => (
298-
"DSA with SHA-1".to_string(),
264+
"DSA with SHA-1",
299265
crate::CryptographicPrimitive::Signature,
300266
0,
301-
None,
302267
),
303268

304269
// Default case for unknown algorithms
305270
_ => (
306-
format!("Unknown Algorithm (OID: {})", oid),
271+
"Unknown Algorithm",
307272
crate::CryptographicPrimitive::Signature,
308273
0,
309-
None,
310274
),
311-
}
275+
};
276+
277+
(name.to_string(), primitive, level, None)
312278
}
313279

314280
/// Convert ASN.1 time to Chrono DateTime

0 commit comments

Comments
 (0)