Skip to content

Commit a36fd2c

Browse files
committed
Crypto: Advanced literal filtering for OpenSSL, used for both unknown and known algorithm literals to improve dataflow performance.
1 parent 9637aec commit a36fd2c

File tree

4 files changed

+134
-35
lines changed

4 files changed

+134
-35
lines changed

cpp/ql/lib/experimental/quantum/Language.qll

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
private import cpp as Language
22
import semmle.code.cpp.dataflow.new.TaintTracking
33
import codeql.quantum.experimental.Model
4+
private import experimental.quantum.OpenSSL.AlgorithmCandidateLiteral
45

56
module CryptoInput implements InputSig<Language::Location> {
67
class DataFlowNode = DataFlow::Node;
@@ -89,13 +90,7 @@ module GenericDataSourceFlowConfig implements DataFlow::ConfigSig {
8990
module GenericDataSourceFlow = TaintTracking::Global<GenericDataSourceFlowConfig>;
9091

9192
private class ConstantDataSource extends Crypto::GenericConstantSourceInstance instanceof Literal {
92-
ConstantDataSource() {
93-
// TODO: this is an API specific workaround for OpenSSL, as 'EC' is a constant that may be used
94-
// where typical algorithms are specified, but EC specifically means set up a
95-
// default curve container, that will later be specified explicitly (or if not a default)
96-
// curve is used.
97-
this.getValue() != "EC"
98-
}
93+
ConstantDataSource() { this instanceof OpenSSLAlgorithmCandidateLiteral }
9994

10095
override DataFlow::Node getOutputNode() { result.asExpr() = this }
10196

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import cpp
2+
private import semmle.code.cpp.models.Models
3+
private import semmle.code.cpp.models.interfaces.FormattingFunction
4+
5+
/**
6+
* Holds if a StringLiteral could be an algorithm literal.
7+
* Note: this predicate should only consider restrictions with respect to strings only.
8+
* General restrictions are in the OpenSSLAlgorithmCandidateLiteral class.
9+
*/
10+
private predicate isOpenSSLStringLiteralAlgorithmCandidate(StringLiteral s) {
11+
// 'EC' is a constant that may be used where typical algorithms are specified,
12+
// but EC specifically means set up a default curve container, that will later be
13+
//specified explicitly (or if not a default) curve is used.
14+
s.getValue() != "EC" and
15+
// Ignore empty strings
16+
s.getValue() != "" and
17+
// ignore strings that represent integers, it is possible this could be used for actual
18+
// algorithms but assuming this is not the common case
19+
// NOTE: if we were to revert this restriction, we should still consider filtering "0"
20+
// to be consistent with filtering integer 0
21+
not exists(s.getValue().toInt()) and
22+
// Filter out strings with "%", to filter out format strings
23+
not s.getValue().matches("%\\%%") and
24+
// Filter out strings in brackets or braces
25+
not s.getValue().matches(["[%]", "(%)"]) and
26+
// Filter out all strings of length 1, since these are not algorithm names
27+
s.getValue().length() > 1 and
28+
// Ignore all strings that are in format string calls outputing to a stream (e.g., stdout)
29+
not exists(FormattingFunctionCall f |
30+
exists(f.getOutputArgument(true)) and s = f.(Call).getAnArgument()
31+
) and
32+
// Ignore all format string calls where there is no known out param (resulting string)
33+
// i.e., ignore printf, since it will just ouput a string and not produce a new string
34+
not exists(FormattingFunctionCall f |
35+
// Note: using two ways of determining if there is an out param, since I'm not sure
36+
// which way is canonical
37+
not exists(f.getOutputArgument(false)) and
38+
not f.getTarget().(FormattingFunction).hasTaintFlow(_, _) and
39+
f.(Call).getAnArgument() = s
40+
)
41+
}
42+
43+
/**
44+
* Holds if an IntLiteral could be an algorithm literal.
45+
* Note: this predicate should only consider restrictions with respect to integers only.
46+
* General restrictions are in the OpenSSLAlgorithmCandidateLiteral class.
47+
*/
48+
private predicate isOpenSSLIntLiteralAlgorithmCandidate(Literal l) {
49+
exists(l.getValue().toInt()) and
50+
// Ignore char literals
51+
not l instanceof CharLiteral and
52+
not l instanceof StringLiteral and
53+
// Ignore integer values of 0, commonly referring to NULL only (no known algorithm 0)
54+
// Also ignore integer values greater than 5000
55+
l.getValue().toInt() != 0 and
56+
// ASSUMPTION, no negative numbers are allowed
57+
// RATIONALE: this is a performance improvement to avoid having to trace every number
58+
not exists(UnaryMinusExpr u | u.getOperand() = l) and
59+
// OPENSSL has a special macro for getting every line, ignore it
60+
not exists(MacroInvocation mi | mi.getExpr() = l and mi.getMacroName() = "OPENSSL_LINE") and
61+
// Filter out cases where an int is returned into a pointer, e.g., return NULL;
62+
not exists(ReturnStmt r |
63+
r.getExpr() = l and
64+
r.getEnclosingFunction().getType().getUnspecifiedType() instanceof DerivedType
65+
) and
66+
// A literal as an array index should never be an algorithm
67+
not exists(ArrayExpr op | op.getArrayOffset() = l) and
68+
// A literal used in a bitwise operation may be an algorithm, but not a candidate
69+
// for the purposes of finding applied algorithms
70+
not exists(BinaryBitwiseOperation op | op.getAnOperand() = l) and
71+
not exists(AssignBitwiseOperation op | op.getAnOperand() = l) and
72+
//Filter out cases where an int is assigned or initialized into a pointer, e.g., char* x = NULL;
73+
not exists(Assignment a |
74+
a.getRValue() = l and
75+
a.getLValue().getType().getUnspecifiedType() instanceof DerivedType
76+
) and
77+
not exists(Initializer i |
78+
i.getExpr() = l and
79+
i.getDeclaration().getADeclarationEntry().getUnspecifiedType() instanceof DerivedType
80+
) and
81+
// Filter out cases where the literal is used in any kind of arithmetic operation
82+
not exists(BinaryArithmeticOperation op | op.getAnOperand() = l) and
83+
not exists(UnaryArithmeticOperation op | op.getOperand() = l) and
84+
not exists(AssignArithmeticOperation op | op.getAnOperand() = l)
85+
}
86+
87+
/**
88+
* Any literal that may represent an algorithm for use in an operation, even if an invalid or unknown algorithm.
89+
* The set of all literals is restricted by this class to cases where there is higher
90+
* plausibility that the literal is eventually used as an algorithm.
91+
* Literals are filtered, for example if they are used in a way no indicative of an algorithm use
92+
* such as in an array index, bitwise operation, or logical operation.
93+
* Note a case like this:
94+
* if(algVal == "AES")
95+
*
96+
* "AES" may be a legitimate algorithm literal, but the literal will not be used for an operation directly
97+
* since it is in a equality comparison, hence this case would also be filtered.
98+
*/
99+
class OpenSSLAlgorithmCandidateLiteral extends Literal {
100+
OpenSSLAlgorithmCandidateLiteral() {
101+
(
102+
isOpenSSLIntLiteralAlgorithmCandidate(this) or
103+
isOpenSSLStringLiteralAlgorithmCandidate(this)
104+
) and
105+
// ********* General filters beyond what is filtered for strings and ints *********
106+
// An algorithm literal in a switch case will not be directly applied to an operation.
107+
not exists(SwitchCase sc | sc.getExpr() = this) and
108+
// A literal in a logical operation may be an algorithm, but not a candidate
109+
// for the purposes of finding applied algorithms
110+
not exists(BinaryLogicalOperation op | op.getAnOperand() = this) and
111+
not exists(UnaryLogicalOperation op | op.getOperand() = this) and
112+
// A literal in a comparison operation may be an algorithm, but not a candidate
113+
// for the purposes of finding applied algorithms
114+
not exists(ComparisonOperation op | op.getAnOperand() = this)
115+
}
116+
}

cpp/ql/lib/experimental/quantum/OpenSSL/AlgorithmInstances/KnownAlgorithmConstants.qll

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import cpp
2-
private import experimental.quantum.OpenSSL.LibraryDetector
2+
import experimental.quantum.OpenSSL.AlgorithmCandidateLiteral
33

44
predicate resolveAlgorithmFromExpr(Expr e, string normalizedName, string algType) {
55
resolveAlgorithmFromCall(e, normalizedName, algType)
@@ -89,7 +89,6 @@ class KnownOpenSSLEllipticCurveAlgorithmConstant extends KnownOpenSSLAlgorithmCo
8989
* alias = "dss1" and target = "dsaWithSHA1"
9090
*/
9191
predicate resolveAlgorithmFromCall(Call c, string normalized, string algType) {
92-
isPossibleOpenSSLFunction(c.getTarget()) and
9392
exists(string name, string parsedTargetName |
9493
parsedTargetName =
9594
c.getTarget().getName().replaceAll("EVP_", "").toLowerCase().replaceAll("_", "-") and
@@ -103,7 +102,9 @@ predicate resolveAlgorithmFromCall(Call c, string normalized, string algType) {
103102
* if `e` resolves to a known algorithm.
104103
* If this predicate does not hold, then `e` can be interpreted as being of `UNKNOWN` type.
105104
*/
106-
predicate resolveAlgorithmFromLiteral(Literal e, string normalized, string algType) {
105+
predicate resolveAlgorithmFromLiteral(
106+
OpenSSLAlgorithmCandidateLiteral e, string normalized, string algType
107+
) {
107108
exists(int nid |
108109
nid = getPossibleNidFromLiteral(e) and knownOpenSSLAlgorithmLiteral(_, nid, normalized, algType)
109110
)
@@ -125,28 +126,15 @@ string resolveAlgorithmAlias(string name) {
125126
)
126127
}
127128

128-
private int getPossibleNidFromLiteral(Literal e) {
129+
/**
130+
* Determines if an int literal (NID) is a candidate for being an algorithm literal.
131+
* Checks for common cases where literals are used that would not be indicative of an algorithm.
132+
* Returns the int literal value if the literal is a candidate for an algorithm.
133+
*/
134+
private int getPossibleNidFromLiteral(OpenSSLAlgorithmCandidateLiteral e) {
129135
result = e.getValue().toInt() and
130136
not e instanceof CharLiteral and
131-
not e instanceof StringLiteral and
132-
// ASSUMPTION, no negative numbers are allowed
133-
// RATIONALE: this is a performance improvement to avoid having to trace every number
134-
not exists(UnaryMinusExpr u | u.getOperand() = e) and
135-
// OPENSSL has a special macro for getting every line, ignore it
136-
not exists(MacroInvocation mi | mi.getExpr() = e and mi.getMacroName() = "OPENSSL_LINE") and
137-
// Filter out cases where an int is assigned into a pointer, e.g., char* x = NULL;
138-
not exists(Assignment a |
139-
a.getRValue() = e and a.getLValue().getType().getUnspecifiedType() instanceof PointerType
140-
) and
141-
not exists(Initializer i |
142-
i.getExpr() = e and
143-
i.getDeclaration().getADeclarationEntry().getUnspecifiedType() instanceof PointerType
144-
) and
145-
// Filter out cases where an int is returned into a pointer, e.g., return NULL;
146-
not exists(ReturnStmt r |
147-
r.getExpr() = e and
148-
r.getEnclosingFunction().getType().getUnspecifiedType() instanceof PointerType
149-
)
137+
not e instanceof StringLiteral
150138
}
151139

152140
string getAlgorithmAlias(string alias) {

cpp/ql/lib/experimental/quantum/OpenSSL/OpenSSL.qll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ import cpp
22
import semmle.code.cpp.dataflow.new.DataFlow
33

44
module OpenSSLModel {
5-
import experimental.quantum.Language
6-
import experimental.quantum.OpenSSL.AlgorithmInstances.OpenSSLAlgorithmInstances
7-
import experimental.quantum.OpenSSL.AlgorithmValueConsumers.OpenSSLAlgorithmValueConsumers
8-
import experimental.quantum.OpenSSL.Operations.OpenSSLOperations
9-
import experimental.quantum.OpenSSL.Random
5+
import AlgorithmInstances.OpenSSLAlgorithmInstances
6+
import AlgorithmValueConsumers.OpenSSLAlgorithmValueConsumers
7+
import Operations.OpenSSLOperations
8+
import Random
9+
import AlgorithmCandidateLiteral
1010
}

0 commit comments

Comments
 (0)