12
12
import python
13
13
import semmle.python.dataflow.TaintTracking
14
14
import semmle.python.web.HttpRequest
15
-
16
- /**
17
- * Provides heuristics for identifying names related to sensitive information.
18
- *
19
- * INTERNAL: Do not use directly.
20
- * This is copied from the javascript library, but should be language independent.
21
- */
22
- private module HeuristicNames {
23
- /**
24
- * Gets a regular expression that identifies strings that may indicate the presence of secret
25
- * or trusted data.
26
- */
27
- string maybeSecret ( ) { result = "(?is).*((?<!is)secret|(?<!un|is)trusted).*" }
28
-
29
- /**
30
- * Gets a regular expression that identifies strings that may indicate the presence of
31
- * user names or other account information.
32
- */
33
- string maybeAccountInfo ( ) {
34
- result = "(?is).*acc(ou)?nt.*" or
35
- result = "(?is).*(puid|username|userid).*"
36
- }
37
-
38
- /**
39
- * Gets a regular expression that identifies strings that may indicate the presence of
40
- * a password or an authorization key.
41
- */
42
- string maybePassword ( ) {
43
- result = "(?is).*pass(wd|word|code|phrase)(?!.*question).*" or
44
- result = "(?is).*(auth(entication|ori[sz]ation)?)key.*"
45
- }
46
-
47
- /**
48
- * Gets a regular expression that identifies strings that may indicate the presence of
49
- * a certificate.
50
- */
51
- string maybeCertificate ( ) { result = "(?is).*(cert)(?!.*(format|name)).*" }
52
-
53
- /**
54
- * Gets a regular expression that identifies strings that may indicate the presence
55
- * of sensitive data, with `classification` describing the kind of sensitive data involved.
56
- */
57
- string maybeSensitive ( SensitiveData data ) {
58
- result = maybeSecret ( ) and data instanceof SensitiveData:: Secret
59
- or
60
- result = maybeAccountInfo ( ) and data instanceof SensitiveData:: Id
61
- or
62
- result = maybePassword ( ) and data instanceof SensitiveData:: Password
63
- or
64
- result = maybeCertificate ( ) and data instanceof SensitiveData:: Certificate
65
- }
66
-
67
- /**
68
- * Gets a regular expression that identifies strings that may indicate the presence of data
69
- * that is hashed or encrypted, and hence rendered non-sensitive.
70
- */
71
- string notSensitive ( ) {
72
- result = "(?is).*(redact|censor|obfuscate|hash|md5|sha|((?<!un)(en))?(crypt|code)).*"
73
- }
74
-
75
- bindingset [ name]
76
- SensitiveData getSensitiveDataForName ( string name ) {
77
- name .regexpMatch ( HeuristicNames:: maybeSensitive ( result ) ) and
78
- not name .regexpMatch ( HeuristicNames:: notSensitive ( ) )
79
- }
80
- }
15
+ import semmle.python.security.internal.SensitiveDataHeuristics
16
+ private import HeuristicNames
81
17
82
18
abstract class SensitiveData extends TaintKind {
83
19
bindingset [ this ]
84
20
SensitiveData ( ) { this = this }
21
+
22
+ /** Gets the classification of this sensitive data taint kind. */
23
+ abstract SensitiveDataClassification getClassification ( ) ;
85
24
}
86
25
87
26
module SensitiveData {
88
27
class Secret extends SensitiveData {
89
28
Secret ( ) { this = "sensitive.data.secret" }
90
29
91
30
override string repr ( ) { result = "a secret" }
31
+
32
+ override SensitiveDataClassification getClassification ( ) {
33
+ result = SensitiveDataClassification:: secret ( )
34
+ }
92
35
}
93
36
94
37
class Id extends SensitiveData {
95
38
Id ( ) { this = "sensitive.data.id" }
96
39
97
40
override string repr ( ) { result = "an ID" }
41
+
42
+ override SensitiveDataClassification getClassification ( ) {
43
+ result = SensitiveDataClassification:: id ( )
44
+ }
98
45
}
99
46
100
47
class Password extends SensitiveData {
101
48
Password ( ) { this = "sensitive.data.password" }
102
49
103
50
override string repr ( ) { result = "a password" }
51
+
52
+ override SensitiveDataClassification getClassification ( ) {
53
+ result = SensitiveDataClassification:: password ( )
54
+ }
104
55
}
105
56
106
57
class Certificate extends SensitiveData {
107
58
Certificate ( ) { this = "sensitive.data.certificate" }
108
59
109
60
override string repr ( ) { result = "a certificate or key" }
61
+
62
+ override SensitiveDataClassification getClassification ( ) {
63
+ result = SensitiveDataClassification:: certificate ( )
64
+ }
110
65
}
111
66
112
67
private SensitiveData fromFunction ( Value func ) {
113
- result = HeuristicNames :: getSensitiveDataForName ( func .getName ( ) )
68
+ nameIndicatesSensitiveData ( func .getName ( ) , result . getClassification ( ) )
114
69
}
115
70
116
71
abstract class Source extends TaintSource {
@@ -134,7 +89,7 @@ module SensitiveData {
134
89
SensitiveData data ;
135
90
136
91
SensitiveVariableAccess ( ) {
137
- data = HeuristicNames :: getSensitiveDataForName ( this .( AttrNode ) .getName ( ) )
92
+ nameIndicatesSensitiveData ( this .( AttrNode ) .getName ( ) , data . getClassification ( ) )
138
93
}
139
94
140
95
override predicate isSourceOf ( TaintKind kind ) { kind = data }
@@ -149,7 +104,7 @@ module SensitiveData {
149
104
this .( CallNode ) .getFunction ( ) .( AttrNode ) .getName ( ) = "get" and
150
105
exists ( StringValue sensitive |
151
106
this .( CallNode ) .getAnArg ( ) .pointsTo ( sensitive ) and
152
- data = HeuristicNames :: getSensitiveDataForName ( sensitive .getText ( ) )
107
+ nameIndicatesSensitiveData ( sensitive .getText ( ) , data . getClassification ( ) )
153
108
)
154
109
}
155
110
0 commit comments