Skip to content

Commit 1616975

Browse files
committed
Python: Model hashlib from standard library
1 parent 7ffbfa8 commit 1616975

File tree

2 files changed

+135
-8
lines changed
  • python/ql

2 files changed

+135
-8
lines changed

python/ql/src/semmle/python/frameworks/Stdlib.qll

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,133 @@ private module Stdlib {
866866
}
867867
}
868868

869+
// ---------------------------------------------------------------------------
870+
// hashlib
871+
// ---------------------------------------------------------------------------
872+
/** Gets a call to `hashlib.new` with `algorithmName` as the first argument. */
873+
private DataFlow::CallCfgNode hashlibNewCall(string algorithmName) {
874+
exists(DataFlow::Node nameArg |
875+
result = API::moduleImport("hashlib").getMember("new").getACall() and
876+
nameArg in [result.getArg(0), result.getArgByName("name")] and
877+
exists(StrConst str |
878+
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(nameArg) and
879+
algorithmName = str.getText()
880+
)
881+
)
882+
}
883+
884+
/** Gets a reference to the result of calling `hashlib.new` with `algorithmName` as the first argument. */
885+
private DataFlow::LocalSourceNode hashlibNewResult(DataFlow::TypeTracker t, string algorithmName) {
886+
t.start() and
887+
result = hashlibNewCall(algorithmName)
888+
or
889+
// Due to bad performance when using normal setup with `hashlibNewResult(t2, algorithmName).track(t2, t)`
890+
// we have inlined that code and forced a join
891+
exists(DataFlow::TypeTracker t2 |
892+
exists(DataFlow::StepSummary summary |
893+
hashlibNewResult_first_join(t2, algorithmName, result, summary) and
894+
t = t2.append(summary)
895+
)
896+
)
897+
}
898+
899+
pragma[nomagic]
900+
private predicate hashlibNewResult_first_join(
901+
DataFlow::TypeTracker t2, string algorithmName, DataFlow::Node res, DataFlow::StepSummary summary
902+
) {
903+
DataFlow::StepSummary::step(hashlibNewResult(t2, algorithmName), res, summary)
904+
}
905+
906+
/** Gets a reference to the result of calling `hashlib.new` with `algorithmName` as the first argument. */
907+
DataFlow::Node hashlibNewResult(string algorithmName) {
908+
hashlibNewResult(DataFlow::TypeTracker::end(), algorithmName).flowsTo(result)
909+
}
910+
911+
/**
912+
* A hashing operation by supplying initial data when calling the `hashlib.new` function.
913+
*/
914+
class HashlibNewCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode {
915+
string hashName;
916+
917+
HashlibNewCall() {
918+
this = hashlibNewCall(hashName) and
919+
exists([this.getArg(1), this.getArgByName("data")])
920+
}
921+
922+
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) }
923+
924+
override DataFlow::Node getAnInput() { result in [this.getArg(1), this.getArgByName("data")] }
925+
}
926+
927+
/**
928+
* A hashing operation by using the `update` method on the result of calling the `hashlib.new` function.
929+
*/
930+
class HashlibNewUpdateCall extends Cryptography::CryptographicOperation::Range,
931+
DataFlow::CallCfgNode {
932+
string hashName;
933+
934+
HashlibNewUpdateCall() {
935+
exists(DataFlow::AttrRead attr |
936+
attr.getObject() = hashlibNewResult(hashName) and
937+
this.getFunction() = attr and
938+
attr.getAttributeName() = "update"
939+
)
940+
}
941+
942+
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) }
943+
944+
override DataFlow::Node getAnInput() { result = this.getArg(0) }
945+
}
946+
947+
/**
948+
* A hashing operation from the `hashlib` package using one of the predefined classes
949+
* (such as `hashlib.md5`). `hashlib.new` is not included, since it is handled by
950+
* `HashlibNewCall` and `HashlibNewUpdateCall`.
951+
*/
952+
abstract class HashlibGenericHashOperation extends Cryptography::CryptographicOperation::Range,
953+
DataFlow::CallCfgNode {
954+
string hashName;
955+
API::Node hashClass;
956+
957+
bindingset[this]
958+
HashlibGenericHashOperation() {
959+
not hashName = "new" and
960+
hashClass = API::moduleImport("hashlib").getMember(hashName)
961+
}
962+
963+
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) }
964+
}
965+
966+
/**
967+
* A hashing operation from the `hashlib` package using one of the predefined classes
968+
* (such as `hashlib.md5`), by calling its' `update` mehtod.
969+
*/
970+
class HashlibHashClassUpdateCall extends HashlibGenericHashOperation {
971+
HashlibHashClassUpdateCall() { this = hashClass.getReturn().getMember("update").getACall() }
972+
973+
override DataFlow::Node getAnInput() { result = this.getArg(0) }
974+
}
975+
976+
/**
977+
* A hashing operation from the `hashlib` package using one of the predefined classes
978+
* (such as `hashlib.md5`), by passing data to when instantiating the class.
979+
*/
980+
class HashlibDataPassedToHashClass extends HashlibGenericHashOperation {
981+
HashlibDataPassedToHashClass() {
982+
// we only want to model calls to classes such as `hashlib.md5()` if initial data
983+
// is passed as an argument
984+
this = hashClass.getACall() and
985+
exists([this.getArg(0), this.getArgByName("string")])
986+
}
987+
988+
override DataFlow::Node getAnInput() {
989+
result = this.getArg(0)
990+
or
991+
// in Python 3.9, you are allowed to use `hashlib.md5(string=<bytes-like>)`.
992+
result = this.getArgByName("string")
993+
}
994+
}
995+
869996
// ---------------------------------------------------------------------------
870997
// OTHER
871998
// ---------------------------------------------------------------------------
Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,29 @@
11
import hashlib
22

33

4-
hasher = hashlib.md5(b"secret message") # $ MISSING: CryptographicOperation CryptographicOperationInput=b"secret message" CryptographicOperationAlgorithm=MD5
4+
hasher = hashlib.md5(b"secret message") # $ CryptographicOperation CryptographicOperationInput=b"secret message" CryptographicOperationAlgorithm=MD5
55
print(hasher.hexdigest())
66

77

8-
hasher = hashlib.md5(string=b"secret message") # $ MISSING: CryptographicOperation CryptographicOperationInput=b"secret message" CryptographicOperationAlgorithm=MD5
8+
hasher = hashlib.md5(string=b"secret message") # $ CryptographicOperation CryptographicOperationInput=b"secret message" CryptographicOperationAlgorithm=MD5
99
print(hasher.hexdigest())
1010

1111

1212
hasher = hashlib.md5()
13-
hasher.update(b"secret") # $ MISSING: CryptographicOperation CryptographicOperationInput=b"secret" CryptographicOperationAlgorithm=MD5
14-
hasher.update(b" message") # $ MISSING: CryptographicOperation CryptographicOperationInput=b" message" CryptographicOperationAlgorithm=MD5
13+
hasher.update(b"secret") # $ CryptographicOperation CryptographicOperationInput=b"secret" CryptographicOperationAlgorithm=MD5
14+
hasher.update(b" message") # $ CryptographicOperation CryptographicOperationInput=b" message" CryptographicOperationAlgorithm=MD5
1515
print(hasher.hexdigest())
1616

1717

18-
hasher = hashlib.new('md5', b"secret message") # $ MISSING: CryptographicOperation CryptographicOperationInput=b"secret message" CryptographicOperationAlgorithm=MD5
18+
hasher = hashlib.new('md5', b"secret message") # $ CryptographicOperation CryptographicOperationInput=b"secret message" CryptographicOperationAlgorithm=MD5
1919
print(hasher.hexdigest())
2020

2121

22-
hasher = hashlib.new('md5', data=b"secret message") # $ MISSING: CryptographicOperation CryptographicOperationInput=b"secret message" CryptographicOperationAlgorithm=MD5
22+
hasher = hashlib.new('md5', data=b"secret message") # $ CryptographicOperation CryptographicOperationInput=b"secret message" CryptographicOperationAlgorithm=MD5
2323
print(hasher.hexdigest())
2424

2525

2626
hasher = hashlib.new('md5')
27-
hasher.update(b"secret") # $ MISSING: CryptographicOperation CryptographicOperationInput=b"secret" CryptographicOperationAlgorithm=MD5
28-
hasher.update(b" message") # $ MISSING: CryptographicOperation CryptographicOperationInput=b" message" CryptographicOperationAlgorithm=MD5
27+
hasher.update(b"secret") # $ CryptographicOperation CryptographicOperationInput=b"secret" CryptographicOperationAlgorithm=MD5
28+
hasher.update(b" message") # $ CryptographicOperation CryptographicOperationInput=b" message" CryptographicOperationAlgorithm=MD5
2929
print(hasher.hexdigest())

0 commit comments

Comments
 (0)