Skip to content

Commit 4d44755

Browse files
committed
Refactor Model and CBOM print queries
1 parent 1a12fb3 commit 4d44755

File tree

5 files changed

+134
-84
lines changed

5 files changed

+134
-84
lines changed

cpp/ql/src/experimental/Quantum/CBOMGraph.ql

Lines changed: 0 additions & 48 deletions
This file was deleted.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/**
2+
* @name Print CBOM Graph
3+
* @description Outputs a graph representation of the cryptographic bill of materials.
4+
* This query only supports DGML output, as CodeQL DOT output omits properties.
5+
* @kind graph
6+
* @id cpp/print-cbom-graph
7+
*/
8+
9+
import experimental.Quantum.Language
10+
11+
query predicate nodes(Crypto::NodeBase node, string key, string value) {
12+
Crypto::nodes_graph_impl(node, key, value)
13+
}
14+
15+
query predicate edges(Crypto::NodeBase source, Crypto::NodeBase target, string key, string value) {
16+
Crypto::edges_graph_impl(source, target, key, value)
17+
}
18+
19+
query predicate graphProperties(string key, string value) {
20+
key = "semmle.graphKind" and value = "graph"
21+
}

java/ql/lib/experimental/Quantum/JCA.qll

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@ module JCAModel {
77
abstract class EncryptionOperation extends Crypto::EncryptionOperation { }
88

99
//TODO PBEWith can have suffixes. how to do? enumerate? or match a pattern?
10+
bindingset[algo]
1011
predicate cipher_names(string algo) {
11-
algo =
12-
[
13-
"AES", "AESWrap", "AESWrapPad", "ARCFOUR", "Blowfish", "ChaCha20", "ChaCha20-Poly1305",
14-
"DES", "DESede", "DESedeWrap", "ECIES", "PBEWith", "RC2", "RC4", "RC5", "RSA"
15-
]
12+
// "Standard names are not case-sensitive."
13+
algo.toUpperCase()
14+
.matches([
15+
"AES", "AESWrap", "AESWrapPad", "ARCFOUR", "Blowfish", "ChaCha20", "ChaCha20-Poly1305",
16+
"DES", "DESede", "DESedeWrap", "ECIES", "PBEWith%", "RC2", "RC4", "RC5", "RSA"
17+
].toUpperCase())
1618
}
1719

1820
//TODO solve the fact that x is an int of various values. same as above... enumerate?
@@ -33,9 +35,10 @@ module JCAModel {
3335
]
3436
}
3537

36-
////cipher specifics ----------------------------------------
37-
class CipherInstance extends Call {
38-
CipherInstance() { this.getCallee().hasQualifiedName("javax.crypto", "Cipher", "getInstance") }
38+
class CipherGetInstanceCall extends Call {
39+
CipherGetInstanceCall() {
40+
this.getCallee().hasQualifiedName("javax.crypto", "Cipher", "getInstance")
41+
}
3942

4043
Expr getAlgorithmArg() { result = this.getArgument(0) }
4144
}
@@ -65,7 +68,7 @@ module JCAModel {
6568
}
6669

6770
override Crypto::TModeOperation getModeType() {
68-
modeToNameMapping(result, this.getRawAlgorithmName())
71+
this.modeToNameMapping(result, this.getRawAlgorithmName())
6972
}
7073

7174
override Crypto::LocatableElement getOrigin(string name) {
@@ -91,7 +94,7 @@ module JCAModel {
9194
predicate isSource(DataFlow::Node src) { src.asExpr() instanceof CipherAlgorithmStringLiteral }
9295

9396
predicate isSink(DataFlow::Node sink) {
94-
exists(CipherInstance call | sink.asExpr() = call.getAlgorithmArg())
97+
exists(CipherGetInstanceCall call | sink.asExpr() = call.getAlgorithmArg())
9598
}
9699
}
97100

@@ -100,7 +103,7 @@ module JCAModel {
100103
predicate algorithmStringToCipherInstanceArgFlow(
101104
string name, CipherAlgorithmStringLiteral origin, Expr arg
102105
) {
103-
exists(CipherInstance sinkCall |
106+
exists(CipherGetInstanceCall sinkCall |
104107
origin.getValue().splitAt("/") = name and
105108
arg = sinkCall and
106109
AlgorithmStringToFetchFlow::flow(DataFlow::exprNode(origin),
@@ -111,7 +114,7 @@ module JCAModel {
111114
predicate modeStringToCipherInstanceArgFlow(
112115
string name, ModeOfOperationStringLiteral mode, Expr arg
113116
) {
114-
exists(CipherInstance sinkCall |
117+
exists(CipherGetInstanceCall sinkCall |
115118
//consider if this should be a more specific predicate
116119
mode.getRawAlgorithmName() = name and
117120
arg = sinkCall and
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/**
2+
* @name Print CBOM Graph
3+
* @description Outputs a graph representation of the cryptographic bill of materials.
4+
* This query only supports DGML output, as CodeQL DOT output omits properties.
5+
* @kind graph
6+
* @id java/print-cbom-graph
7+
*/
8+
9+
import experimental.Quantum.Language
10+
11+
query predicate nodes(Crypto::NodeBase node, string key, string value) {
12+
Crypto::nodes_graph_impl(node, key, value)
13+
}
14+
15+
query predicate edges(Crypto::NodeBase source, Crypto::NodeBase target, string key, string value) {
16+
Crypto::edges_graph_impl(source, target, key, value)
17+
}
18+
19+
query predicate graphProperties(string key, string value) {
20+
key = "semmle.graphKind" and value = "graph"
21+
}

shared/cryptography/codeql/cryptography/Model.qll

Lines changed: 77 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,45 @@ module CryptographyBase<LocationSig Location, InputSig<Location> Input> {
2222
UnknownPropertyValue() { this = "<unknown>" }
2323
}
2424

25+
private string getPropertyAsGraphString(NodeBase node, string key) {
26+
result =
27+
strictconcat(any(string value, Location location, string parsed |
28+
node.properties(key, value, location) and
29+
parsed = "(" + value + "," + location.toString() + ")"
30+
|
31+
parsed
32+
), ","
33+
)
34+
}
35+
36+
predicate nodes_graph_impl(NodeBase node, string key, string value) {
37+
key = "semmle.label" and
38+
value = node.toString()
39+
or
40+
// CodeQL's DGML output does not include a location
41+
key = "Location" and
42+
value = node.getLocation().toString()
43+
or
44+
// Known unknown edges should be reported as properties rather than edges
45+
node = node.getChild(key) and
46+
value = "<unknown>"
47+
or
48+
// Report properties
49+
value = getPropertyAsGraphString(node, key)
50+
}
51+
52+
predicate edges_graph_impl(NodeBase source, NodeBase target, string key, string value) {
53+
key = "semmle.label" and
54+
target = source.getChild(value) and
55+
// Known unknowns are reported as properties rather than edges
56+
not source = target
57+
}
58+
59+
/**
60+
* The base class for all cryptographic assets, such as operations and algorithms.
61+
*
62+
* Each `NodeBase` is a node in a graph of cryptographic operations, where the edges are the relationships between the nodes.
63+
*/
2564
abstract class NodeBase instanceof LocatableElement {
2665
/**
2766
* Returns a string representation of this node, usually the name of the operation/algorithm/property.
@@ -104,6 +143,7 @@ module CryptographyBase<LocationSig Location, InputSig<Location> Input> {
104143

105144
/**
106145
* A hashing operation that processes data to generate a hash value.
146+
*
107147
* This operation takes an input message of arbitrary content and length and produces a fixed-size
108148
* hash value as the output using a specified hashing algorithm.
109149
*/
@@ -113,19 +153,7 @@ module CryptographyBase<LocationSig Location, InputSig<Location> Input> {
113153
override string getOperationName() { result = "HASH" }
114154
}
115155

116-
// Rule: no newtype representing a type of algorithm should be modelled with multiple interfaces
117-
//
118-
// Example: HKDF and PKCS12KDF are both key derivation algorithms.
119-
// However, PKCS12KDF also has a property: the iteration count.
120-
//
121-
// If we have HKDF and PKCS12KDF under TKeyDerivationType,
122-
// someone modelling a library might try to make a generic identification of both of those algorithms.
123-
//
124-
// They will therefore not use the specialized type for PKCS12KDF,
125-
// meaning "from PKCS12KDF algo select algo" will have no results.
126-
//
127156
newtype THashType =
128-
// We're saying by this that all of these have an identical interface / properties / edges
129157
MD5() or
130158
SHA1() or
131159
SHA256() or
@@ -197,8 +225,28 @@ module CryptographyBase<LocationSig Location, InputSig<Location> Input> {
197225
}
198226
}
199227

200-
newtype TEllipticCurveFamilyType =
201-
// We're saying by this that all of these have an identical interface / properties / edges
228+
/*
229+
* TODO:
230+
*
231+
* Rule: No newtype representing a type of algorithm should be modelled with multiple interfaces
232+
*
233+
* Example 1: HKDF and PKCS12KDF are both key derivation algorithms.
234+
* However, PKCS12KDF also has a property: the iteration count.
235+
*
236+
* If we have HKDF and PKCS12KDF under TKeyDerivationType,
237+
* someone modelling a library might try to make a generic identification of both of those algorithms.
238+
*
239+
* They will therefore not use the specialized type for PKCS12KDF,
240+
* meaning "from PKCS12KDF algo select algo" will have no results.
241+
*
242+
* Example 2: Each type below represents a common family of elliptic curves, with a shared interface, i.e.,
243+
* predicates for library modellers to implement as well as the properties and edges reported.
244+
*/
245+
246+
/**
247+
* Elliptic curve algorithms
248+
*/
249+
newtype TEllipticCurveFamily =
202250
NIST() or
203251
SEC() or
204252
NUMS() or
@@ -211,13 +259,10 @@ module CryptographyBase<LocationSig Location, InputSig<Location> Input> {
211259
ES() or
212260
OtherEllipticCurveFamilyType()
213261

214-
/**
215-
* Elliptic curve algorithm
216-
*/
217262
abstract class EllipticCurve extends Algorithm {
218263
abstract string getKeySize(Location location);
219264

220-
abstract TEllipticCurveFamilyType getCurveFamilyType();
265+
abstract TEllipticCurveFamily getCurveFamilyType();
221266

222267
override predicate properties(string key, string value, Location location) {
223268
super.properties(key, value, location)
@@ -236,8 +281,10 @@ module CryptographyBase<LocationSig Location, InputSig<Location> Input> {
236281
/**
237282
* Mandating that for Elliptic Curves specifically, users are responsible
238283
* for providing as the 'raw' name, the official name of the algorithm.
284+
*
239285
* Casing doesn't matter, we will enforce further naming restrictions on
240286
* `getAlgorithmName` by default.
287+
*
241288
* Rationale: elliptic curve names can have a lot of variation in their components
242289
* (e.g., "secp256r1" vs "P-256"), trying to produce generalized set of properties
243290
* is possible to capture all cases, but such modeling is likely not necessary.
@@ -256,17 +303,20 @@ module CryptographyBase<LocationSig Location, InputSig<Location> Input> {
256303
override string getOperationName() { result = "ENCRYPTION" }
257304
}
258305

306+
/**
307+
* Block cipher modes of operation algorithms
308+
*/
259309
newtype TModeOperation =
260310
ECB() or
261311
CBC() or
262312
OtherMode()
263313

264314
abstract class ModeOfOperation extends Algorithm {
265-
string getValue() { result = "" }
266-
267315
final private predicate modeToNameMapping(TModeOperation type, string name) {
268316
type instanceof ECB and name = "ECB"
269317
or
318+
type instanceof CBC and name = "CBC"
319+
or
270320
type instanceof OtherMode and name = this.getRawAlgorithmName()
271321
}
272322

@@ -275,17 +325,20 @@ module CryptographyBase<LocationSig Location, InputSig<Location> Input> {
275325
override string getAlgorithmName() { this.modeToNameMapping(this.getModeType(), result) }
276326
}
277327

328+
/**
329+
* A helper type for distinguishing between block and stream ciphers.
330+
*/
278331
newtype TCipherStructure =
279332
Block() or
280333
Stream()
281334

282-
newtype TSymmetricCipherFamilyType =
283-
// We're saying by this that all of these have an identical interface / properties / edges
284-
AES()
285-
286335
/**
287336
* Symmetric algorithms
288337
*/
338+
newtype TSymmetricCipherFamilyType =
339+
AES() or
340+
OtherSymmetricCipherFamilyType()
341+
289342
abstract class SymmetricAlgorithm extends Algorithm {
290343
abstract TSymmetricCipherFamilyType getSymmetricCipherFamilyType();
291344

0 commit comments

Comments
 (0)