Skip to content

Commit 17afbdf

Browse files
authored
Merge pull request github#5635 from RasmusWL/port-weak-crypto-algorithm
Approved by yoff
2 parents c4f604b + 753dca9 commit 17afbdf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+1668
-291
lines changed

config/identical-files.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@
439439
],
440440
"CryptoAlgorithms Python/JS": [
441441
"javascript/ql/src/semmle/javascript/security/CryptoAlgorithms.qll",
442-
"python/ql/src/semmle/crypto/Crypto.qll"
442+
"python/ql/src/semmle/python/concepts/CryptoAlgorithms.qll"
443443
],
444444
"SensitiveDataHeuristics Python/JS": [
445445
"javascript/ql/src/semmle/javascript/security/internal/SensitiveDataHeuristics.qll",
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Updated the _Use of a broken or weak cryptographic algorithm_ (`py/weak-cryptographic-algorithm`) query, so it alerts on any use of a weak cryptographic non-hashing algorithm. Introduced a new query _Use of a broken or weak cryptographic hashing algorithm on sensitive data_ (`py/weak-sensitive-data-hashing`) to handle weak cryptographic hashing algorithms, which only alerts when used on sensitive data.

python/ql/src/Security/CWE-327/BrokenCryptoAlgorithm.qhelp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,28 @@
1515
secure than it appears to be.
1616
</p>
1717

18+
<p>
19+
This query alerts on any use of a weak cryptographic algorithm, that is
20+
not a hashing algorithm. Use of broken or weak cryptographic hash
21+
functions are handled by the
22+
<code>py/weak-sensitive-data-hashing</code> query.
23+
</p>
24+
1825
</overview>
1926
<recommendation>
2027

2128
<p>
2229
Ensure that you use a strong, modern cryptographic
23-
algorithm. Use at least AES-128 or RSA-2048 for
24-
encryption, and SHA-2 or SHA-3 for secure hashing.
30+
algorithm, such as AES-128 or RSA-2048.
2531
</p>
2632

2733
</recommendation>
2834
<example>
2935

3036
<p>
31-
The following code uses the <code>pycrypto</code>
37+
The following code uses the <code>pycryptodome</code>
3238
library to encrypt some secret data. When you create a cipher using
33-
<code>pycrypto</code> you must specify the encryption
39+
<code>pycryptodome</code> you must specify the encryption
3440
algorithm to use. The first example uses DES, which is an
3541
older algorithm that is now considered weak. The second
3642
example uses AES, which is a stronger modern algorithm.
@@ -39,8 +45,12 @@
3945
<sample src="examples/broken_crypto.py" />
4046

4147
<p>
42-
WARNING: Although the second example above is more robust,
43-
pycrypto is no longer actively maintained so we recommend using <code>cryptography</code> instead.
48+
NOTICE: the original
49+
<code><a href="https://pypi.org/project/pycrypto/">pycrypto</a></code>
50+
PyPI package that provided the <code>Crypto</code> module is not longer
51+
actively maintained, so you should use the
52+
<code><a href="https://pypi.org/project/pycryptodome/">pycryptodome</a></code>
53+
PyPI package instead (which has a compatible API).
4454
</p>
4555

4656
</example>
Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/**
22
* @name Use of a broken or weak cryptographic algorithm
33
* @description Using broken or weak cryptographic algorithms can compromise security.
4-
* @kind path-problem
4+
* @kind problem
55
* @problem.severity warning
66
* @precision high
77
* @id py/weak-cryptographic-algorithm
@@ -10,21 +10,15 @@
1010
*/
1111

1212
import python
13-
import semmle.python.security.Paths
14-
import semmle.python.security.SensitiveData
15-
import semmle.python.security.Crypto
13+
import semmle.python.Concepts
1614

17-
class BrokenCryptoConfiguration extends TaintTracking::Configuration {
18-
BrokenCryptoConfiguration() { this = "Broken crypto configuration" }
19-
20-
override predicate isSource(TaintTracking::Source source) {
21-
source instanceof SensitiveDataSource
22-
}
23-
24-
override predicate isSink(TaintTracking::Sink sink) { sink instanceof WeakCryptoSink }
25-
}
26-
27-
from BrokenCryptoConfiguration config, TaintedPathSource src, TaintedPathSink sink
28-
where config.hasFlowPath(src, sink)
29-
select sink.getSink(), src, sink, "$@ is used in a broken or weak cryptographic algorithm.",
30-
src.getSource(), "Sensitive data"
15+
from Cryptography::CryptographicOperation operation, Cryptography::CryptographicAlgorithm algorithm
16+
where
17+
algorithm = operation.getAlgorithm() and
18+
algorithm.isWeak() and
19+
// `Cryptography::HashingAlgorithm` and `Cryptography::PasswordHashingAlgorithm` are
20+
// handled by `py/weak-sensitive-data-hashing`
21+
algorithm instanceof Cryptography::EncryptionAlgorithm
22+
select operation,
23+
"The cryptographic algorithm " + algorithm.getName() +
24+
" is broken or weak, and should not be used."
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
<overview>
6+
<p>
7+
Using a broken or weak cryptographic hash function can leave data
8+
vulnerable, and should not be used in security related code.
9+
</p>
10+
11+
<p>
12+
A strong cryptographic hash function should be resistant to:
13+
</p>
14+
<ul>
15+
<li>
16+
pre-image attacks: if you know a hash value <code>h(x)</code>,
17+
you should not be able to easily find the input <code>x</code>.
18+
</li>
19+
<li>
20+
collision attacks: if you know a hash value <code>h(x)</code>,
21+
you should not be able to easily find a different input <code>y</code>
22+
with the same hash value <code>h(x) = h(y)</code>.
23+
</li>
24+
</ul>
25+
<p>
26+
In cases with a limited input space, such as for passwords, the hash
27+
function also needs to be computationally expensive to be resistant to
28+
brute-force attacks. Passwords should also have an unique salt applied
29+
before hashing, but that is not considered by this query.
30+
</p>
31+
32+
<p>
33+
As an example, both MD5 and SHA-1 are known to be vulnerable to collision attacks.
34+
</p>
35+
36+
<p>
37+
Since it's OK to use a weak cryptographic hash function in a non-security
38+
context, this query only alerts when these are used to hash sensitive
39+
data (such as passwords, certificates, usernames).
40+
</p>
41+
42+
<p>
43+
Use of broken or weak cryptographic algorithms that are not hashing algorithms, is
44+
handled by the <code>py/weak-cryptographic-algorithm</code> query.
45+
</p>
46+
47+
</overview>
48+
<recommendation>
49+
50+
<p>
51+
Ensure that you use a strong, modern cryptographic hash function:
52+
</p>
53+
54+
<ul>
55+
<li>
56+
such as Argon2, scrypt, bcrypt, or PBKDF2 for passwords and other data with limited input space.
57+
</li>
58+
<li>
59+
such as SHA-2, or SHA-3 in other cases.
60+
</li>
61+
</ul>
62+
63+
</recommendation>
64+
<example>
65+
66+
<p>
67+
The following example shows two functions for checking whether the hash
68+
of a certificate matches a known value -- to prevent tampering.
69+
70+
The first function uses MD5 that is known to be vulnerable to collision attacks.
71+
72+
The second function uses SHA-256 that is a strong cryptographic hashing function.
73+
</p>
74+
75+
<sample src="examples/weak_certificate_hashing.py" />
76+
77+
</example>
78+
<example>
79+
<p>
80+
The following example shows two functions for hashing passwords.
81+
82+
The first function uses SHA-256 to hash passwords. Although SHA-256 is a
83+
strong cryptographic hash function, it is not suitable for password
84+
hashing since it is not computationally expensive.
85+
</p>
86+
87+
<sample src="examples/weak_password_hashing_bad.py" />
88+
89+
90+
<p>
91+
The second function uses Argon2 (through the <code>argon2-cffi</code>
92+
PyPI package), which is a strong password hashing algorithm (and
93+
includes a per-password salt by default).
94+
</p>
95+
96+
<sample src="examples/weak_password_hashing_good.py" />
97+
98+
</example>
99+
100+
<references>
101+
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html">Password Storage Cheat Sheet</a></li>
102+
</references>
103+
104+
</qhelp>
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/**
2+
* @name Use of a broken or weak cryptographic hashing algorithm on sensitive data
3+
* @description Using broken or weak cryptographic hashing algorithms can compromise security.
4+
* @kind path-problem
5+
* @problem.severity warning
6+
* @precision high
7+
* @id py/weak-sensitive-data-hashing
8+
* @tags security
9+
* external/cwe/cwe-327
10+
* external/cwe/cwe-916
11+
*/
12+
13+
import python
14+
import semmle.python.security.dataflow.WeakSensitiveDataHashing
15+
import semmle.python.dataflow.new.DataFlow
16+
import semmle.python.dataflow.new.TaintTracking
17+
import DataFlow::PathGraph
18+
19+
from
20+
DataFlow::PathNode source, DataFlow::PathNode sink, string ending, string algorithmName,
21+
string classification
22+
where
23+
exists(NormalHashFunction::Configuration config |
24+
config.hasFlowPath(source, sink) and
25+
algorithmName = sink.getNode().(NormalHashFunction::Sink).getAlgorithmName() and
26+
classification = source.getNode().(NormalHashFunction::Source).getClassification() and
27+
ending = "."
28+
)
29+
or
30+
exists(ComputationallyExpensiveHashFunction::Configuration config |
31+
config.hasFlowPath(source, sink) and
32+
algorithmName = sink.getNode().(ComputationallyExpensiveHashFunction::Sink).getAlgorithmName() and
33+
classification =
34+
source.getNode().(ComputationallyExpensiveHashFunction::Source).getClassification() and
35+
(
36+
sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
37+
ending = "."
38+
or
39+
not sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
40+
ending =
41+
" for " + classification +
42+
" hashing, since it is not a computationally expensive hash function."
43+
)
44+
)
45+
select sink.getNode(), source, sink,
46+
"$@ is used in a hashing algorithm (" + algorithmName + ") that is insecure" + ending,
47+
source.getNode(), "Sensitive data (" + classification + ")"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import hashlib
2+
3+
def certificate_matches_known_hash_bad(certificate, known_hash):
4+
hash = hashlib.md5(certificate).hexdigest() # BAD
5+
return hash == known_hash
6+
7+
def certificate_matches_known_hash_good(certificate, known_hash):
8+
hash = hashlib.sha256(certificate).hexdigest() # GOOD
9+
return hash == known_hash
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import hashlib
2+
3+
def get_password_hash(password: str, salt: str):
4+
return hashlib.sha256(password + salt).hexdigest() # BAD
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from argon2 import PasswordHasher
2+
3+
def get_initial_hash(password: str):
4+
ph = PasswordHasher()
5+
return ph.hash(password) # GOOD
6+
7+
def check_password(password: str, known_hash):
8+
ph = PasswordHasher()
9+
return ph.verify(known_hash, password) # GOOD
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/**
2+
* @name OLD QUERY: Use of a broken or weak cryptographic algorithm
3+
* @description Using broken or weak cryptographic algorithms can compromise security.
4+
* @kind path-problem
5+
* @problem.severity warning
6+
* @id py/old/weak-cryptographic-algorithm
7+
* @deprecated
8+
*/
9+
10+
import python
11+
import semmle.python.security.Paths
12+
import semmle.python.security.SensitiveData
13+
import semmle.python.security.Crypto
14+
15+
class BrokenCryptoConfiguration extends TaintTracking::Configuration {
16+
BrokenCryptoConfiguration() { this = "Broken crypto configuration" }
17+
18+
override predicate isSource(TaintTracking::Source source) {
19+
source instanceof SensitiveDataSource
20+
}
21+
22+
override predicate isSink(TaintTracking::Sink sink) { sink instanceof WeakCryptoSink }
23+
}
24+
25+
from BrokenCryptoConfiguration config, TaintedPathSource src, TaintedPathSink sink
26+
where config.hasFlowPath(src, sink)
27+
select sink.getSink(), src, sink, "$@ is used in a broken or weak cryptographic algorithm.",
28+
src.getSource(), "Sensitive data"

0 commit comments

Comments
 (0)