Skip to content

Commit e65fbd0

Browse files
cursoragentscript3r
andcommitted
Refactor: Use pattern registry for algorithm detection
This commit refactors the algorithm detection logic to utilize a pattern registry. This allows for more flexible and configurable detection of cryptographic algorithms based on defined patterns in a TOML file. The changes include: - **AlgorithmDetector:** - Replaced hardcoded parameter patterns with a reference to `PatternRegistry`. - Modified `detect_algorithms` to use registry patterns for extraction. - Introduced `extract_algorithms_from_finding_with_registry` and `perform_deep_static_analysis_with_registry` for registry-based detection. - Added a fallback mechanism for when no registry is available. - Removed specific extraction methods for different libraries (e.g., `extract_rustcrypto_algorithms`). - Added helper methods for parsing primitives and creating algorithm assets from specifications. - **CbomGenerator:** - Added a `with_registry` constructor to accept a `PatternRegistry`. - **CLI:** - Updated `main.rs` to pass the loaded `PatternRegistry` to `CbomGenerator`. - **Scanner Core:** - Added `AlgorithmSpec` and `ParameterPattern` structs to define algorithm patterns in TOML. - Added `CompiledAlgorithm` and `CompiledParameterPattern` for compiled regexes. - Modified `compile_library` to compile algorithm specifications. - **Patterns.toml:** - Added new algorithm definitions for OpenSSL and RustCrypto, including symbol patterns, parameter extraction regexes, and NIST quantum security levels. These changes decouple algorithm detection logic from the code, making it easier to add support for new libraries and algorithms by simply updating the `patterns.toml` file. Co-authored-by: script3r <[email protected]>
1 parent f3d0078 commit e65fbd0

File tree

7 files changed

+479
-440
lines changed

7 files changed

+479
-440
lines changed

crates/cbom-generator/src/algorithm_detector.rs

Lines changed: 190 additions & 411 deletions
Large diffs are not rendered by default.

crates/cbom-generator/src/lib.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
77
use anyhow::{Context, Result};
88
use chrono::{DateTime, Utc};
9-
use scanner_core::Finding;
9+
use scanner_core::{Finding, PatternRegistry};
1010
use serde::{Deserialize, Serialize};
1111
use std::fs;
1212
use std::path::Path;
13+
use std::sync::Arc;
1314
use uuid::Uuid;
1415

1516
pub mod certificate_parser;
@@ -201,6 +202,15 @@ impl CbomGenerator {
201202
}
202203
}
203204

205+
pub fn with_registry(registry: Arc<PatternRegistry>) -> Self {
206+
Self {
207+
certificate_parser: CertificateParser::new(),
208+
dependency_analyzer: DependencyAnalyzer::new(),
209+
algorithm_detector: AlgorithmDetector::with_registry(registry),
210+
project_parser: ProjectParser::new(),
211+
}
212+
}
213+
204214
/// Generate an MV-CBOM for the given directory
205215
pub fn generate_cbom(&self, scan_path: &Path, findings: &[Finding]) -> Result<MvCbom> {
206216
let scan_path = scan_path.canonicalize()

crates/cli/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ fn main() -> Result<()> {
202202

203203
// Generate MV-CBOM if requested
204204
if args.cbom {
205-
let cbom_generator = CbomGenerator::new();
205+
let cbom_generator = CbomGenerator::with_registry(reg.clone());
206206

207207
// Use the first path as the scan root for CBOM generation
208208
let default_path = PathBuf::from(".");

crates/scanner-core/src/lib.rs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ pub struct LibrarySpec {
185185
pub languages: Vec<Language>,
186186
#[serde(default)]
187187
pub patterns: LibraryPatterns,
188+
#[serde(default)]
189+
pub algorithms: Vec<AlgorithmSpec>,
188190
}
189191

190192
#[derive(Debug, Clone, Default, Deserialize)]
@@ -199,6 +201,26 @@ pub struct LibraryPatterns {
199201
pub apis: Vec<String>,
200202
}
201203

204+
#[derive(Debug, Clone, Deserialize)]
205+
pub struct AlgorithmSpec {
206+
pub name: String,
207+
pub primitive: String, // "signature", "aead", "hash", "kem", "pke", "mac", "kdf", "prng"
208+
#[serde(default)]
209+
pub parameter_patterns: Vec<ParameterPattern>,
210+
#[serde(rename = "nistQuantumSecurityLevel")]
211+
pub nist_quantum_security_level: u8,
212+
#[serde(default)]
213+
pub symbol_patterns: Vec<String>, // Regex patterns to match this algorithm in findings
214+
}
215+
216+
#[derive(Debug, Clone, Deserialize)]
217+
pub struct ParameterPattern {
218+
pub name: String, // e.g., "keySize", "curve", "outputSize"
219+
pub pattern: String, // Regex pattern to extract the parameter value
220+
#[serde(default)]
221+
pub default_value: Option<serde_json::Value>, // Default value if not found
222+
}
223+
202224
#[derive(Deserialize)]
203225
pub struct Config {
204226
#[serde(default = "default_max_file_size")]
@@ -309,6 +331,23 @@ pub struct CompiledLibrary {
309331
pub namespace: Vec<Regex>,
310332
pub apis: Vec<Regex>,
311333
pub prefilter_substrings: Vec<String>,
334+
pub algorithms: Vec<CompiledAlgorithm>,
335+
}
336+
337+
#[derive(Debug, Clone)]
338+
pub struct CompiledAlgorithm {
339+
pub name: String,
340+
pub primitive: String,
341+
pub nist_quantum_security_level: u8,
342+
pub symbol_patterns: Vec<Regex>,
343+
pub parameter_patterns: Vec<CompiledParameterPattern>,
344+
}
345+
346+
#[derive(Debug, Clone)]
347+
pub struct CompiledParameterPattern {
348+
pub name: String,
349+
pub pattern: Regex,
350+
pub default_value: Option<serde_json::Value>,
312351
}
313352

314353
#[derive(Debug)]
@@ -372,6 +411,7 @@ fn compile_library(lib: LibrarySpec) -> Result<CompiledLibrary> {
372411
let namespace = compile_regexes(&lib.patterns.namespace)?;
373412
let apis = compile_regexes(&lib.patterns.apis)?;
374413
let prefilter_substrings = derive_prefilter_substrings(&lib.patterns);
414+
let algorithms = compile_algorithms(&lib.algorithms)?;
375415
Ok(CompiledLibrary {
376416
name: lib.name,
377417
languages: lib.languages.into_iter().collect(),
@@ -380,6 +420,7 @@ fn compile_library(lib: LibrarySpec) -> Result<CompiledLibrary> {
380420
namespace,
381421
apis,
382422
prefilter_substrings,
423+
algorithms,
383424
})
384425
}
385426

@@ -392,6 +433,33 @@ fn compile_regexes(srcs: &[String]) -> Result<Vec<Regex>> {
392433
.collect()
393434
}
394435

436+
fn compile_algorithms(algorithms: &[AlgorithmSpec]) -> Result<Vec<CompiledAlgorithm>> {
437+
algorithms.iter()
438+
.map(|algo| {
439+
let symbol_patterns = compile_regexes(&algo.symbol_patterns)?;
440+
let parameter_patterns = algo.parameter_patterns.iter()
441+
.map(|param| {
442+
let pattern = Regex::new(&param.pattern)
443+
.with_context(|| format!("bad parameter pattern: {}", param.pattern))?;
444+
Ok(CompiledParameterPattern {
445+
name: param.name.clone(),
446+
pattern,
447+
default_value: param.default_value.clone(),
448+
})
449+
})
450+
.collect::<Result<Vec<_>>>()?;
451+
452+
Ok(CompiledAlgorithm {
453+
name: algo.name.clone(),
454+
primitive: algo.primitive.clone(),
455+
nist_quantum_security_level: algo.nist_quantum_security_level,
456+
symbol_patterns,
457+
parameter_patterns,
458+
})
459+
})
460+
.collect()
461+
}
462+
395463
fn derive_prefilter_substrings(p: &LibraryPatterns) -> Vec<String> {
396464
let mut set = BTreeSet::new();
397465
let mut push_tokens = |s: &str| {

patterns.toml

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,46 @@ apis = [
2323
"\\bPKCS\\d_[A-Za-z0-9_]+\\s*\\(",
2424
]
2525

26+
# Algorithm definitions for OpenSSL
27+
[[library.algorithms]]
28+
name = "RSA"
29+
primitive = "signature"
30+
nistQuantumSecurityLevel = 0
31+
symbol_patterns = [
32+
"\\bRSA_",
33+
"\\bEVP_PKEY_RSA",
34+
]
35+
[[library.algorithms.parameter_patterns]]
36+
name = "keySize"
37+
pattern = "RSA_(\\d+)"
38+
default_value = 2048
39+
40+
[[library.algorithms]]
41+
name = "ECDSA"
42+
primitive = "signature"
43+
nistQuantumSecurityLevel = 0
44+
symbol_patterns = [
45+
"\\bECDSA_",
46+
"\\bEC_KEY_",
47+
]
48+
[[library.algorithms.parameter_patterns]]
49+
name = "curve"
50+
pattern = ".*"
51+
default_value = "P-256"
52+
53+
[[library.algorithms]]
54+
name = "AES"
55+
primitive = "aead"
56+
nistQuantumSecurityLevel = 3
57+
symbol_patterns = [
58+
"\\bEVP_aes",
59+
"\\bAES_",
60+
]
61+
[[library.algorithms.parameter_patterns]]
62+
name = "keySize"
63+
pattern = "aes_(\\d+)"
64+
default_value = 256
65+
2666
[[library]]
2767
name = "libsodium"
2868
languages = ["C", "C++"]
@@ -231,6 +271,99 @@ apis = [
231271
"\\b(?:Sha256|Sha512|Digest)\\b",
232272
]
233273

274+
# Algorithm definitions for RustCrypto
275+
[[library.algorithms]]
276+
name = "RSA"
277+
primitive = "signature"
278+
nistQuantumSecurityLevel = 0
279+
symbol_patterns = [
280+
"\\b(?:rsa::|RsaPrivateKey|RsaPublicKey)",
281+
]
282+
[[library.algorithms.parameter_patterns]]
283+
name = "keySize"
284+
pattern = "(?:new|generate).*?(\\d{4})"
285+
default_value = 2048
286+
287+
[[library.algorithms]]
288+
name = "AES-GCM"
289+
primitive = "aead"
290+
nistQuantumSecurityLevel = 3
291+
symbol_patterns = [
292+
"\\baes_gcm::|Aes\\d+Gcm",
293+
]
294+
[[library.algorithms.parameter_patterns]]
295+
name = "keySize"
296+
pattern = "Aes(\\d+)Gcm"
297+
default_value = 256
298+
299+
[[library.algorithms]]
300+
name = "ChaCha20Poly1305"
301+
primitive = "aead"
302+
nistQuantumSecurityLevel = 3
303+
symbol_patterns = [
304+
"\\bchacha20poly1305::|ChaCha20Poly1305",
305+
]
306+
[[library.algorithms.parameter_patterns]]
307+
name = "keySize"
308+
pattern = ".*"
309+
default_value = 256
310+
311+
[[library.algorithms]]
312+
name = "SHA-256"
313+
primitive = "hash"
314+
nistQuantumSecurityLevel = 3
315+
symbol_patterns = [
316+
"\\bsha2::|Sha256",
317+
]
318+
[[library.algorithms.parameter_patterns]]
319+
name = "outputSize"
320+
pattern = "Sha(\\d+)"
321+
default_value = 256
322+
323+
[[library.algorithms]]
324+
name = "SHA-512"
325+
primitive = "hash"
326+
nistQuantumSecurityLevel = 3
327+
symbol_patterns = [
328+
"\\bsha2::|Sha512",
329+
]
330+
[[library.algorithms.parameter_patterns]]
331+
name = "outputSize"
332+
pattern = "Sha(\\d+)"
333+
default_value = 512
334+
335+
[[library.algorithms]]
336+
name = "BLAKE3"
337+
primitive = "hash"
338+
nistQuantumSecurityLevel = 3
339+
symbol_patterns = [
340+
"\\bblake3::|Blake3",
341+
]
342+
343+
[[library.algorithms]]
344+
name = "Ed25519"
345+
primitive = "signature"
346+
nistQuantumSecurityLevel = 0
347+
symbol_patterns = [
348+
"\\bed25519_dalek::|Ed25519",
349+
]
350+
[[library.algorithms.parameter_patterns]]
351+
name = "curve"
352+
pattern = ".*"
353+
default_value = "Curve25519"
354+
355+
[[library.algorithms]]
356+
name = "ECDSA"
357+
primitive = "signature"
358+
nistQuantumSecurityLevel = 0
359+
symbol_patterns = [
360+
"\\bp256::|p384::|k256::|Ecdsa",
361+
]
362+
[[library.algorithms.parameter_patterns]]
363+
name = "curve"
364+
pattern = "p(\\d+)"
365+
default_value = "P-256"
366+
234367
# =========================
235368
# Swift
236369
# =========================

test-cases/test-case-1-rsa-uses/mv-cbom.json

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
{
22
"bomFormat": "MV-CBOM",
33
"specVersion": "1.0",
4-
"serialNumber": "urn:uuid:1eb7780b-c620-4533-af49-b6ecde6b532d",
4+
"serialNumber": "urn:uuid:a2194b88-40ae-488e-bea2-c1d0feccab8a",
55
"version": 1,
66
"metadata": {
77
"component": {
88
"name": "test-rsa-uses",
99
"version": "0.1.0",
1010
"path": "/workspace/test-cases/test-case-1-rsa-uses"
1111
},
12-
"timestamp": "2025-09-15T17:20:55.563520627Z",
12+
"timestamp": "2025-09-15T17:30:58.500387978Z",
1313
"tools": [
1414
{
1515
"name": "cipherscope",
@@ -20,43 +20,56 @@
2020
},
2121
"cryptoAssets": [
2222
{
23-
"bom-ref": "9401744e-ca4e-45fe-9914-c46a7d739120",
23+
"bom-ref": "760f0d9d-01b2-494e-b660-7b73028b0a11",
2424
"assetType": "algorithm",
25-
"name": "AES-256-GCM",
25+
"name": "RSA",
26+
"assetProperties": {
27+
"primitive": "signature",
28+
"nistQuantumSecurityLevel": 0
29+
}
30+
},
31+
{
32+
"bom-ref": "fa69322d-3e2f-462d-a34c-9380713f2232",
33+
"assetType": "algorithm",
34+
"name": "RSA",
2635
"assetProperties": {
27-
"primitive": "aead",
28-
"parameterSet": {
29-
"keySize": 256,
30-
"mode": "GCM"
31-
},
32-
"nistQuantumSecurityLevel": 3
36+
"primitive": "signature",
37+
"nistQuantumSecurityLevel": 0
3338
}
3439
},
3540
{
36-
"bom-ref": "58f4da9a-a323-4fbf-887e-94689f8e67cf",
41+
"bom-ref": "80b57347-548b-4cce-848f-c7ba93174742",
42+
"assetType": "algorithm",
43+
"name": "RSA",
44+
"assetProperties": {
45+
"primitive": "signature",
46+
"nistQuantumSecurityLevel": 0
47+
}
48+
},
49+
{
50+
"bom-ref": "09715785-37bc-4c54-a62e-ba84eda8fe7a",
51+
"assetType": "algorithm",
52+
"name": "RSA",
53+
"assetProperties": {
54+
"primitive": "signature",
55+
"nistQuantumSecurityLevel": 0
56+
}
57+
},
58+
{
59+
"bom-ref": "930733f2-34ac-41a0-a24e-d27c57fc0df5",
3760
"assetType": "algorithm",
3861
"name": "RSA",
3962
"assetProperties": {
4063
"primitive": "signature",
41-
"parameterSet": {
42-
"keySize": 2048
43-
},
4464
"nistQuantumSecurityLevel": 0
4565
}
4666
}
4767
],
4868
"dependencies": [
4969
{
50-
"ref": "8184dc5d-d9ec-467c-8f47-42eb9d067718",
51-
"dependsOn": [
52-
"9401744e-ca4e-45fe-9914-c46a7d739120"
53-
],
54-
"dependencyType": "uses"
55-
},
56-
{
57-
"ref": "8184dc5d-d9ec-467c-8f47-42eb9d067718",
70+
"ref": "93cf08e0-223d-41f4-bb85-b88dc435f99c",
5871
"dependsOn": [
59-
"58f4da9a-a323-4fbf-887e-94689f8e67cf"
72+
"760f0d9d-01b2-494e-b660-7b73028b0a11"
6073
],
6174
"dependencyType": "implements"
6275
}

0 commit comments

Comments
 (0)