Skip to content

Commit cc72bfb

Browse files
committed
Swift: Add the shared SensitiveDataHeuristics.qll to Swift.
1 parent 4781881 commit cc72bfb

File tree

2 files changed

+127
-2
lines changed

2 files changed

+127
-2
lines changed

config/identical-files.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,8 @@
512512
"SensitiveDataHeuristics Python/JS": [
513513
"javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll",
514514
"python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll",
515-
"ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll"
515+
"ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll",
516+
"swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll"
516517
],
517518
"CFG": [
518519
"csharp/ql/lib/semmle/code/csharp/controlflow/internal/ControlFlowGraphImplShared.qll",
@@ -599,4 +600,4 @@
599600
"python/ql/lib/semmle/python/security/internal/EncryptionKeySizes.qll",
600601
"java/ql/lib/semmle/code/java/security/internal/EncryptionKeySizes.qll"
601602
]
602-
}
603+
}
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
/**
2+
* INTERNAL: Do not use.
3+
*
4+
* Provides classes and predicates for identifying strings that may indicate the presence of sensitive data.
5+
* Such that we can share this logic across our CodeQL analysis of different languages.
6+
*
7+
* 'Sensitive' data in general is anything that should not be sent around in unencrypted form.
8+
*/
9+
10+
/**
11+
* A classification of different kinds of sensitive data:
12+
*
13+
* - secret: generic secret or trusted data;
14+
* - id: a user name or other account information;
15+
* - password: a password or authorization key;
16+
* - certificate: a certificate.
17+
*
18+
* While classifications are represented as strings, this should not be relied upon.
19+
* Instead, use the predicates in `SensitiveDataClassification::` to work with
20+
* classifications.
21+
*/
22+
class SensitiveDataClassification extends string {
23+
SensitiveDataClassification() { this in ["secret", "id", "password", "certificate"] }
24+
}
25+
26+
/**
27+
* Provides predicates to select the different kinds of sensitive data we support.
28+
*/
29+
module SensitiveDataClassification {
30+
/** Gets the classification for secret or trusted data. */
31+
SensitiveDataClassification secret() { result = "secret" }
32+
33+
/** Gets the classification for user names or other account information. */
34+
SensitiveDataClassification id() { result = "id" }
35+
36+
/** Gets the classification for passwords or authorization keys. */
37+
SensitiveDataClassification password() { result = "password" }
38+
39+
/** Gets the classification for certificates. */
40+
SensitiveDataClassification certificate() { result = "certificate" }
41+
}
42+
43+
/**
44+
* INTERNAL: Do not use.
45+
*
46+
* Provides heuristics for identifying names related to sensitive information.
47+
*/
48+
module HeuristicNames {
49+
/**
50+
* Gets a regular expression that identifies strings that may indicate the presence of secret
51+
* or trusted data.
52+
*/
53+
string maybeSecret() { result = "(?is).*((?<!is|is_)secret|(?<!un|un_|is|is_)trusted).*" }
54+
55+
/**
56+
* Gets a regular expression that identifies strings that may indicate the presence of
57+
* user names or other account information.
58+
*/
59+
string maybeAccountInfo() {
60+
result = "(?is).*acc(ou)?nt.*" or
61+
result = "(?is).*(puid|username|userid|session(id|key)).*" or
62+
result = "(?s).*([uU]|^|_|[a-z](?=U))([uU][iI][dD]).*"
63+
}
64+
65+
/**
66+
* Gets a regular expression that identifies strings that may indicate the presence of
67+
* a password or an authorization key.
68+
*/
69+
string maybePassword() {
70+
result = "(?is).*pass(wd|word|code|phrase)(?!.*question).*" or
71+
result = "(?is).*(auth(entication|ori[sz]ation)?)key.*"
72+
}
73+
74+
/**
75+
* Gets a regular expression that identifies strings that may indicate the presence of
76+
* a certificate.
77+
*/
78+
string maybeCertificate() { result = "(?is).*(cert)(?!.*(format|name)).*" }
79+
80+
/**
81+
* Gets a regular expression that identifies strings that may indicate the presence
82+
* of sensitive data, with `classification` describing the kind of sensitive data involved.
83+
*/
84+
string maybeSensitiveRegexp(SensitiveDataClassification classification) {
85+
result = maybeSecret() and classification = SensitiveDataClassification::secret()
86+
or
87+
result = maybeAccountInfo() and classification = SensitiveDataClassification::id()
88+
or
89+
result = maybePassword() and classification = SensitiveDataClassification::password()
90+
or
91+
result = maybeCertificate() and
92+
classification = SensitiveDataClassification::certificate()
93+
}
94+
95+
/**
96+
* Gets a regular expression that identifies strings that may indicate the presence of data
97+
* that is hashed or encrypted, and hence rendered non-sensitive, or contains special characters
98+
* suggesting nouns within the string do not represent the meaning of the whole string (e.g. a URL or a SQL query).
99+
*
100+
* We also filter out common words like `certain` and `concert`, since otherwise these could
101+
* be matched by the certificate regular expressions. Same for `accountable` (account), or
102+
* `secretarial` (secret).
103+
*/
104+
string notSensitiveRegexp() {
105+
result =
106+
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
107+
}
108+
109+
/**
110+
* Holds if `name` may indicate the presence of sensitive data, and
111+
* `name` does not indicate that the data is in fact non-sensitive (for example since
112+
* it is hashed or encrypted). `classification` describes the kind of sensitive data
113+
* involved.
114+
*
115+
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
116+
* given classification), and none of the regexps from `notSensitiveRegexp` matches
117+
* `name`.
118+
*/
119+
bindingset[name]
120+
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {
121+
name.regexpMatch(maybeSensitiveRegexp(classification)) and
122+
not name.regexpMatch(notSensitiveRegexp())
123+
}
124+
}

0 commit comments

Comments
 (0)