Skip to content

Commit 3370477

Browse files
Merge pull request github#16503 from joefarebrother/ruby-sensitive-sources
Ruby: Use additional sensitive data heuristics for CleartextSources
2 parents cd9d58f + eee7f5a commit 3370477

File tree

8 files changed

+171
-41
lines changed

8 files changed

+171
-41
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* The `CleartextSources.qll` library, used by `rb/clear-text-logging-sensitive-data` and `rb/clear-text-logging-sensitive-data`, has been updated to consider heuristics for additional categories of sensitive data.

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,7 @@ private module Cached {
725725
newtype TOptionalContentSet =
726726
TSingletonContent(Content c) or
727727
TAnyElementContent() or
728+
TAnyContent() or
728729
TKnownOrUnknownElementContent(Content::KnownElementContent c) or
729730
TElementLowerBoundContent(int lower, boolean includeUnknown) {
730731
FlowSummaryImpl::ParsePositions::isParsedElementLowerBoundPosition(_, includeUnknown, lower)
@@ -736,7 +737,7 @@ private module Cached {
736737

737738
cached
738739
class TContentSet =
739-
TSingletonContent or TAnyElementContent or TKnownOrUnknownElementContent or
740+
TSingletonContent or TAnyElementContent or TAnyContent or TKnownOrUnknownElementContent or
740741
TElementLowerBoundContent or TElementContentOfTypeContent;
741742

742743
private predicate trackKnownValue(ConstantValue cv) {

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,9 @@ class ContentSet extends TContentSet {
689689
/** Holds if this content set represents all `ElementContent`s. */
690690
predicate isAnyElement() { this = TAnyElementContent() }
691691

692+
/** Holds if this content set represents all contents. */
693+
predicate isAny() { this = TAnyContent() }
694+
692695
/**
693696
* Holds if this content set represents a specific known element index, or an
694697
* unknown element index.
@@ -737,6 +740,9 @@ class ContentSet extends TContentSet {
737740
this.isAnyElement() and
738741
result = "any element"
739742
or
743+
this.isAny() and
744+
result = "any"
745+
or
740746
exists(Content::KnownElementContent c |
741747
this.isKnownOrUnknownElement(c) and
742748
result = c + " or unknown"
@@ -790,13 +796,8 @@ class ContentSet extends TContentSet {
790796
result = TUnknownElementContent()
791797
}
792798

793-
/** Gets a content that may be read from when reading from this set. */
794-
Content getAReadContent() {
795-
this.isSingleton(result)
796-
or
797-
this.isAnyElement() and
798-
result instanceof Content::ElementContent
799-
or
799+
pragma[nomagic]
800+
private Content getAnElementReadContent() {
800801
exists(Content::KnownElementContent c | this.isKnownOrUnknownElement(c) |
801802
result = c or
802803
result = TSplatContent(c.getIndex().getInt(), _) or
@@ -832,6 +833,19 @@ class ContentSet extends TContentSet {
832833
result = TUnknownElementContent()
833834
)
834835
}
836+
837+
/** Gets a content that may be read from when reading from this set. */
838+
Content getAReadContent() {
839+
this.isSingleton(result)
840+
or
841+
this.isAnyElement() and
842+
result instanceof Content::ElementContent
843+
or
844+
this.isAny() and
845+
exists(result)
846+
or
847+
result = this.getAnElementReadContent()
848+
}
835849
}
836850

837851
/**

ruby/ql/lib/codeql/ruby/security/CleartextLoggingQuery.qll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ private module Config implements DataFlow::ConfigSig {
4444
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
4545
CL::isAdditionalTaintStep(nodeFrom, nodeTo)
4646
}
47+
48+
predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet cs) {
49+
cs.isAny() and
50+
isSink(node)
51+
}
4752
}
4853

4954
/**

ruby/ql/lib/codeql/ruby/security/CleartextStorageQuery.qll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ private module Config implements DataFlow::ConfigSig {
4343
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
4444
CS::isAdditionalTaintStep(nodeFrom, nodeTo)
4545
}
46+
47+
predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet cs) {
48+
cs.isAny() and
49+
isSink(node)
50+
}
4651
}
4752

4853
/**

ruby/ql/lib/codeql/ruby/security/internal/CleartextSources.qll

Lines changed: 67 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ private import codeql.ruby.DataFlow
99
private import codeql.ruby.TaintTracking::TaintTracking
1010
private import codeql.ruby.dataflow.RemoteFlowSources
1111
private import SensitiveDataHeuristics::HeuristicNames
12+
private import SensitiveDataHeuristics
1213
private import codeql.ruby.CFG
1314
private import codeql.ruby.dataflow.SSA
1415

@@ -39,6 +40,34 @@ module CleartextSources {
3940
re.getConstantValue().getStringlikeValue() = [".*", ".+"]
4041
}
4142

43+
/** Holds if `c` is a sensitive data classification that is relevant to consider for Cleartext Storage queries. */
44+
private predicate isRelevantClassification(SensitiveDataClassification c) {
45+
c =
46+
[
47+
SensitiveDataClassification::password(), SensitiveDataClassification::certificate(),
48+
SensitiveDataClassification::secret(), SensitiveDataClassification::private()
49+
]
50+
}
51+
52+
pragma[noinline]
53+
private string getCombinedRelevantSensitiveRegexp() {
54+
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
55+
result =
56+
"(?:" +
57+
strictconcat(string r, SensitiveDataClassification c |
58+
r = maybeSensitiveRegexp(c) and isRelevantClassification(c)
59+
|
60+
r, ")|(?:"
61+
) + ")"
62+
}
63+
64+
/** Holds if the given name indicates the presence of sensitive data that is relevant to consider for Cleartext Storage queries. */
65+
bindingset[name]
66+
private predicate nameIndicatesRelevantSensitiveData(string name) {
67+
name.regexpMatch(getCombinedRelevantSensitiveRegexp()) and
68+
not name.regexpMatch(notSensitiveRegexp())
69+
}
70+
4271
/**
4372
* Holds if `re` may be a regular expression that can be used to sanitize
4473
* sensitive data with a call to `gsub`.
@@ -92,17 +121,17 @@ module CleartextSources {
92121
}
93122

94123
/**
95-
* A call that might obfuscate a password, for example through hashing.
124+
* A call that might obfuscate sensitive data, for example through hashing.
96125
*/
97126
private class ObfuscatorCall extends Sanitizer, DataFlow::CallNode {
98127
ObfuscatorCall() { nameIsNotSensitive(this.getMethodName()) }
99128
}
100129

101130
/**
102-
* A data flow node that does not contain a clear-text password, according to its syntactic name.
131+
* A data flow node that does not contain clear-text sensitive data, according to its syntactic name.
103132
*/
104-
private class NameGuidedNonCleartextPassword extends NonCleartextPassword {
105-
NameGuidedNonCleartextPassword() {
133+
private class NameGuidedNonCleartextSensitive extends NonCleartextSensitive {
134+
NameGuidedNonCleartextSensitive() {
106135
exists(string name | nameIsNotSensitive(name) |
107136
// accessing a non-sensitive variable
108137
this.asExpr().getExpr().(VariableReadAccess).getVariable().getName() = name
@@ -129,18 +158,23 @@ module CleartextSources {
129158
}
130159

131160
/**
132-
* A data flow node that receives flow that is not a clear-text password.
161+
* A data flow node that receives flow that is not clear-text sensitive data.
133162
*/
134-
class NonCleartextPasswordFlow extends NonCleartextPassword {
135-
NonCleartextPasswordFlow() {
136-
any(NonCleartextPassword other).(DataFlow::LocalSourceNode).flowsTo(this)
163+
class NonCleartextSensitiveFlow extends NonCleartextSensitive {
164+
NonCleartextSensitiveFlow() {
165+
any(NonCleartextSensitive other).(DataFlow::LocalSourceNode).flowsTo(this)
137166
}
138167
}
139168

140169
/**
141-
* A data flow node that does not contain a clear-text password.
170+
* DEPRECATED: Use NonCleartextSensitiveFlow instead.
171+
*/
172+
deprecated class NonCleartextPasswordFlow = NonCleartextSensitiveFlow;
173+
174+
/**
175+
* A data flow node that does not contain clear-text sensitive data.
142176
*/
143-
abstract private class NonCleartextPassword extends DataFlow::Node { }
177+
abstract private class NonCleartextSensitive extends DataFlow::Node { }
144178

145179
// `writeNode` assigns pair with key `name` to `val`
146180
private predicate hashKeyWrite(DataFlow::CallNode writeNode, string name, DataFlow::Node val) {
@@ -153,18 +187,18 @@ module CleartextSources {
153187
}
154188

155189
/**
156-
* A value written to a hash entry with a key that may contain password information.
190+
* A value written to a hash entry with a key that may contain sensitive information.
157191
*/
158-
private class HashKeyWritePasswordSource extends Source {
192+
private class HashKeyWriteSensitiveSource extends Source {
159193
private string name;
160194
private DataFlow::ExprNode recv;
161195

162-
HashKeyWritePasswordSource() {
196+
HashKeyWriteSensitiveSource() {
163197
exists(DataFlow::CallNode writeNode |
164-
name.regexpMatch(maybePassword()) and
198+
nameIndicatesRelevantSensitiveData(name) and
165199
not nameIsNotSensitive(name) and
166200
// avoid safe values assigned to presumably unsafe names
167-
not this instanceof NonCleartextPassword and
201+
not this instanceof NonCleartextSensitive and
168202
// hash[name] = val
169203
hashKeyWrite(writeNode, name, this) and
170204
recv = writeNode.getReceiver()
@@ -177,7 +211,7 @@ module CleartextSources {
177211
string getName() { result = name }
178212

179213
/**
180-
* Gets the name of the hash variable that this password source is assigned
214+
* Gets the name of the hash variable that this sensitive source is assigned
181215
* to, if applicable.
182216
*/
183217
LocalVariable getVariable() {
@@ -186,17 +220,17 @@ module CleartextSources {
186220
}
187221

188222
/**
189-
* An entry into a hash literal that may contain a password
223+
* An entry into a hash literal that may contain sensitive data
190224
*/
191-
private class HashLiteralPasswordSource extends Source {
225+
private class HashLiteralSensitiveSource extends Source {
192226
private string name;
193227

194-
HashLiteralPasswordSource() {
228+
HashLiteralSensitiveSource() {
195229
exists(CfgNodes::ExprNodes::HashLiteralCfgNode lit |
196-
name.regexpMatch(maybePassword()) and
230+
nameIndicatesRelevantSensitiveData(name) and
197231
not nameIsNotSensitive(name) and
198232
// avoid safe values assigned to presumably unsafe names
199-
not this instanceof NonCleartextPassword and
233+
not this instanceof NonCleartextSensitive and
200234
// hash = { name: val }
201235
exists(CfgNodes::ExprNodes::PairCfgNode p | p = lit.getAKeyValuePair() |
202236
p.getKey().getConstantValue().getStringlikeValue() = name and
@@ -208,14 +242,14 @@ module CleartextSources {
208242
override string describe() { result = "a write to " + name }
209243
}
210244

211-
/** An assignment that may assign a password to a variable */
212-
private class AssignPasswordVariableSource extends Source {
245+
/** An assignment that may assign sensitive data to a variable */
246+
private class AssignSensitiveVariableSource extends Source {
213247
string name;
214248

215-
AssignPasswordVariableSource() {
249+
AssignSensitiveVariableSource() {
216250
// avoid safe values assigned to presumably unsafe names
217-
not this instanceof NonCleartextPassword and
218-
name.regexpMatch(maybePassword()) and
251+
not this instanceof NonCleartextSensitive and
252+
nameIndicatesRelevantSensitiveData(name) and
219253
not nameIsNotSensitive(name) and
220254
exists(Assignment a |
221255
this.asExpr().getExpr() = a.getRightOperand() and
@@ -226,14 +260,14 @@ module CleartextSources {
226260
override string describe() { result = "an assignment to " + name }
227261
}
228262

229-
/** A parameter that may contain a password. */
230-
private class ParameterPasswordSource extends Source {
263+
/** A parameter that may contain sensitive data. */
264+
private class ParameterSensitiveSource extends Source {
231265
private string name;
232266

233-
ParameterPasswordSource() {
234-
name.regexpMatch(maybePassword()) and
267+
ParameterSensitiveSource() {
268+
nameIndicatesRelevantSensitiveData(name) and
235269
not nameIsNotSensitive(name) and
236-
not this instanceof NonCleartextPassword and
270+
not this instanceof NonCleartextSensitive and
237271
exists(Parameter p, LocalVariable v |
238272
v = p.getAVariable() and
239273
v.getName() = name and
@@ -260,10 +294,10 @@ module CleartextSources {
260294
deprecated predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
261295
exists(string name, ElementReference ref, LocalVariable hashVar |
262296
// from `hsh[password] = "changeme"` to a `hsh[password]` read
263-
nodeFrom.(HashKeyWritePasswordSource).getName() = name and
297+
nodeFrom.(HashKeyWriteSensitiveSource).getName() = name and
264298
nodeTo.asExpr().getExpr() = ref and
265299
ref.getArgument(0).getConstantValue().getStringlikeValue() = name and
266-
nodeFrom.(HashKeyWritePasswordSource).getVariable() = hashVar and
300+
nodeFrom.(HashKeyWriteSensitiveSource).getVariable() = hashVar and
267301
ref.getReceiver().(VariableReadAccess).getVariable() = hashVar and
268302
nodeFrom.asExpr().getASuccessor*() = nodeTo.asExpr()
269303
)

0 commit comments

Comments
 (0)