Skip to content

Commit e97cfdb

Browse files
committed
Switch to use-use dataflow. This will make post-update nodes easy to implement.
Queries / tests that required changes: * The CleartextLogging and MissingErrorCheck queries are updated because they assumed def-use flow * The CommandInjection query works around the shortcomings of use-use flow by essentially reintroducing def-use flow when it applies a sanitizer * The OpenUrlRedirect query currently just accepts its fate; the tests are updated to avoid excess sanitization while the query comments on the problem. We should choose this approach or the CommandInjection one.
1 parent de87dd5 commit e97cfdb

File tree

16 files changed

+605
-315
lines changed

16 files changed

+605
-315
lines changed

go/ql/lib/semmle/go/dataflow/SSA.qll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ class SsaExplicitDefinition extends SsaDefinition, TExplicitDef {
187187
) {
188188
this.getInstruction().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
189189
}
190+
191+
IR::Instruction getAFirstUse() { firstUse(this, result) }
190192
}
191193

192194
/** Provides a helper predicate for working with explicit SSA definitions. */
@@ -409,3 +411,5 @@ DataFlow::Node getASimilarReadNode(DataFlow::Node node) {
409411
result = readFields.similar().getAUse()
410412
)
411413
}
414+
415+
IR::Instruction getAnAdjacentUse(IR::Instruction pred) { adjacentUseUse(pred, result) }

go/ql/lib/semmle/go/dataflow/SsaImpl.qll

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,8 @@ private module Internal {
199199
/**
200200
* Holds if the `i`th node of `bb` is a use or an SSA definition of variable `v`, with
201201
* `k` indicating whether it is the former or the latter.
202+
*
203+
* Note this includes phi nodes, whereas `ref` above only includes explicit writes and captures.
202204
*/
203205
private predicate ssaRef(ReachableBasicBlock bb, int i, SsaSourceVariable v, RefKind k) {
204206
useAt(bb, i, v) and k = ReadRef()
@@ -290,6 +292,145 @@ private module Internal {
290292
or
291293
rewindReads(bb, i, v) = 1 and result = getDefReachingEndOf(bb.getImmediateDominator(), v)
292294
}
295+
296+
private module AdjacentUsesImpl {
297+
/** Holds if `v` is defined or used in `b`. */
298+
private predicate varOccursInBlock(SsaSourceVariable v, ReachableBasicBlock b) {
299+
ssaRef(b, _, v, _)
300+
}
301+
302+
/** Holds if `v` occurs in `b` or one of `b`'s transitive successors. */
303+
private predicate blockPrecedesVar(SsaSourceVariable v, ReachableBasicBlock b) {
304+
varOccursInBlock(v, b)
305+
or
306+
exists(getDefReachingEndOf(b, v))
307+
}
308+
309+
/**
310+
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
311+
* in `b2` or one of its transitive successors but not in any block on the path
312+
* between `b1` and `b2`.
313+
*/
314+
private predicate varBlockReaches(
315+
SsaSourceVariable v, ReachableBasicBlock b1, ReachableBasicBlock b2
316+
) {
317+
varOccursInBlock(v, b1) and
318+
b2 = b1.getASuccessor() and
319+
blockPrecedesVar(v, b2)
320+
or
321+
exists(ReachableBasicBlock mid |
322+
varBlockReaches(v, b1, mid) and
323+
b2 = mid.getASuccessor() and
324+
not varOccursInBlock(v, mid) and
325+
blockPrecedesVar(v, b2)
326+
)
327+
}
328+
329+
/**
330+
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
331+
* `b2` but not in any block on the path between `b1` and `b2`.
332+
*/
333+
private predicate varBlockStep(
334+
SsaSourceVariable v, ReachableBasicBlock b1, ReachableBasicBlock b2
335+
) {
336+
varBlockReaches(v, b1, b2) and
337+
varOccursInBlock(v, b2)
338+
}
339+
340+
/**
341+
* Gets the maximum rank among all SSA references to `v` in basic block `bb`.
342+
*/
343+
private int maxSsaRefRank(ReachableBasicBlock bb, SsaSourceVariable v) {
344+
result = max(ssaRefRank(bb, _, v, _))
345+
}
346+
347+
/**
348+
* Holds if `v` occurs at index `i1` in `b1` and at index `i2` in `b2` and
349+
* there is a path between them without any occurrence of `v`.
350+
*/
351+
pragma[nomagic]
352+
predicate adjacentVarRefs(
353+
SsaSourceVariable v, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2, int i2
354+
) {
355+
exists(int rankix |
356+
b1 = b2 and
357+
ssaRefRank(b1, i1, v, _) = rankix and
358+
ssaRefRank(b2, i2, v, _) = rankix + 1
359+
)
360+
or
361+
maxSsaRefRank(b1, v) = ssaRefRank(b1, i1, v, _) and
362+
varBlockStep(v, b1, b2) and
363+
ssaRefRank(b2, i2, v, _) = 1
364+
}
365+
366+
predicate variableUse(SsaSourceVariable v, IR::Instruction use, ReachableBasicBlock bb, int i) {
367+
bb.getNode(i) = use and
368+
exists(SsaVariable sv |
369+
sv.getSourceVariable() = v and
370+
use = sv.getAUse()
371+
)
372+
}
373+
}
374+
375+
private import AdjacentUsesImpl
376+
377+
/**
378+
* Holds if the value defined at `def` can reach `use` without passing through
379+
* any other uses, but possibly through phi nodes.
380+
*/
381+
cached
382+
predicate firstUse(SsaDefinition def, IR::Instruction use) {
383+
exists(SsaSourceVariable v, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2, int i2 |
384+
adjacentVarRefs(v, b1, i1, b2, i2) and
385+
def.definesAt(b1, i1, v) and
386+
variableUse(v, use, b2, i2)
387+
)
388+
or
389+
exists(
390+
SsaSourceVariable v, SsaPhiNode redef, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2,
391+
int i2
392+
|
393+
adjacentVarRefs(v, b1, i1, b2, i2) and
394+
def.definesAt(b1, i1, v) and
395+
redef.definesAt(b2, i2, v) and
396+
firstUse(redef, use)
397+
)
398+
}
399+
400+
/**
401+
* Holds if `use1` and `use2` form an adjacent use-use-pair of the same SSA
402+
* variable, that is, the value read in `use1` can reach `use2` without passing
403+
* through any other use or any SSA definition of the variable.
404+
*/
405+
cached
406+
predicate adjacentUseUseSameVar(IR::Instruction use1, IR::Instruction use2) {
407+
exists(SsaSourceVariable v, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2, int i2 |
408+
adjacentVarRefs(v, b1, i1, b2, i2) and
409+
variableUse(v, use1, b1, i1) and
410+
variableUse(v, use2, b2, i2)
411+
)
412+
}
413+
414+
/**
415+
* Holds if `use1` and `use2` form an adjacent use-use-pair of the same
416+
* `SsaSourceVariable`, that is, the value read in `use1` can reach `use2`
417+
* without passing through any other use or any SSA definition of the variable
418+
* except for phi nodes and uncertain implicit updates.
419+
*/
420+
cached
421+
predicate adjacentUseUse(IR::Instruction use1, IR::Instruction use2) {
422+
adjacentUseUseSameVar(use1, use2)
423+
or
424+
exists(
425+
SsaSourceVariable v, SsaPhiNode def, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2,
426+
int i2
427+
|
428+
adjacentVarRefs(v, b1, i1, b2, i2) and
429+
variableUse(v, use1, b1, i1) and
430+
def.definesAt(b2, i2, v) and
431+
firstUse(def, use2)
432+
)
433+
}
293434
}
294435

295436
import Internal

go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,26 +64,36 @@ predicate basicLocalFlowStep(Node nodeFrom, Node nodeTo) {
6464
else nodeTo.asInstruction() = evalAssert
6565
)
6666
or
67-
// Instruction -> SSA
67+
// Instruction -> SSA defn
6868
exists(IR::Instruction pred, SsaExplicitDefinition succ |
6969
succ.getRhs() = pred and
7070
nodeFrom = instructionNode(pred) and
7171
nodeTo = ssaNode(succ)
7272
)
7373
or
74-
// SSA -> SSA
75-
exists(SsaDefinition pred, SsaPseudoDefinition succ | succ.getAnInput() = pred |
74+
// SSA defn -> SSA capture
75+
exists(SsaExplicitDefinition pred, SsaVariableCapture succ |
76+
// Check: should these flow from PHIs as well? Perhaps they should be included
77+
// in the use-use graph?
78+
succ.(SsaVariableCapture).getSourceVariable() = pred.(SsaExplicitDefinition).getSourceVariable()
79+
|
7680
nodeFrom = ssaNode(pred) and
7781
nodeTo = ssaNode(succ)
7882
)
7983
or
80-
// SSA -> Instruction
81-
exists(SsaDefinition pred, IR::Instruction succ |
82-
succ = pred.getVariable().getAUse() and
84+
// SSA defn -> first SSA use
85+
exists(SsaExplicitDefinition pred, IR::Instruction succ | succ = pred.getAFirstUse() |
8386
nodeFrom = ssaNode(pred) and
8487
nodeTo = instructionNode(succ)
8588
)
8689
or
90+
// SSA use -> successive SSA use
91+
// Note this case includes Phi node traversal
92+
exists(IR::Instruction pred, IR::Instruction succ | succ = getAnAdjacentUse(pred) |
93+
nodeFrom = instructionNode(pred) and
94+
nodeTo = instructionNode(succ)
95+
)
96+
or
8797
// GlobalFunctionNode -> use
8898
nodeFrom =
8999
any(GlobalFunctionNode fn | fn.getFunction() = nodeTo.asExpr().(FunctionName).getTarget())

go/ql/lib/semmle/go/security/CleartextLoggingCustomizations.qll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ module CleartextLogging {
5555
|
5656
this.asExpr().(Ident).getName() = name
5757
or
58+
this.(DataFlow::SsaNode).getSourceVariable().getName() = name
59+
or
5860
this.(DataFlow::FieldReadNode).getFieldName() = name
5961
or
6062
this.(DataFlow::CallNode).getCalleeName() = name
@@ -143,7 +145,7 @@ module CleartextLogging {
143145
not this instanceof NonCleartextPassword and
144146
name.regexpMatch(maybePassword()) and
145147
(
146-
this.asExpr().(Ident).getName() = name
148+
this.(DataFlow::SsaNode).getSourceVariable().getName() = name
147149
or
148150
exists(DataFlow::FieldReadNode fn |
149151
fn = this and

go/ql/lib/semmle/go/security/CommandInjection.qll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,28 @@ module CommandInjection {
123123
node instanceof Sanitizer or
124124
node = any(ArgumentArrayWithDoubleDash array).getASanitizedElement()
125125
}
126+
127+
// Hack: with use-use flow, we might have x (use at line 1) -> x (use at line 2),
128+
// x (use at line 1) -> array at line 1 and x (use at line 2) -> array at line 2,
129+
// in the context
130+
//
131+
// array1 := {"--", x}
132+
// array2 := {x, "--"}
133+
//
134+
// We want to taint array2 but not array1, which suggests excluding the edge x (use 1) -> array1
135+
// However isSanitizer only allows us to remove nodes (isSanitizerIn/Out permit removing all outgoing
136+
// or incoming edges); we can't remove an individual edge, so instead we supply extra edges connecting
137+
// the definition with the next use.
138+
predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) {
139+
exists(
140+
ArgumentArrayWithDoubleDash array, DataFlow::InstructionNode sanitized,
141+
DataFlow::SsaNode defn
142+
|
143+
sanitized = array.getASanitizedElement() and sanitized = defn.getAUse()
144+
|
145+
pred = defn and succ = sanitized.getASuccessor()
146+
)
147+
}
126148
}
127149

128150
/**

go/ql/lib/semmle/go/security/OpenUrlRedirect.qll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ module OpenUrlRedirect {
5858
w.writesField(node.getASuccessor(), f, _)
5959
)
6060
or
61+
// Note this blocks other outgoing edges from this node too, so it will
62+
// cause false negatives in combination with use-use flow as subsequent
63+
// uses are incorrectly sanitized. Noting the other end of the
64+
// sanitizing edge as a BarrierIn has the opposite problem, incorrectly
65+
// rejecting taint from the other side of a concatenation for example.
6166
hostnameSanitizingPrefixEdge(node, _)
6267
}
6368
}

go/ql/src/InconsistentCode/MissingErrorCheck.ql

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,16 @@ predicate checksValue(IR::Instruction instruction, DataFlow::SsaNode value) {
7272
)
7373
}
7474

75+
// Now that we have use-use flow, phi nodes aren't directly involved in the flow graph. TODO: change this?
76+
DataFlow::SsaNode phiDefinedFrom(DataFlow::SsaNode node) {
77+
result.getDefinition().(SsaPseudoDefinition).getAnInput() = node.getDefinition().getVariable()
78+
}
79+
80+
DataFlow::SsaNode definedFrom(DataFlow::SsaNode node) {
81+
DataFlow::localFlow(node, result) or
82+
result = phiDefinedFrom*(node)
83+
}
84+
7585
/**
7686
* Matches if `call` is a function returning (`ptr`, `err`) where `ptr` may be nil, and neither
7787
* `ptr` not `err` has been checked for validity as of `node`.
@@ -98,7 +108,7 @@ predicate returnUncheckedAtNode(
98108
// localFlow is used to permit checks via either an SSA phi node or ordinary assignment.
99109
returnUncheckedAtNode(call, node.getAPredecessor(), ptr, err) and
100110
not exists(DataFlow::SsaNode checked |
101-
DataFlow::localFlow(ptr, checked) or DataFlow::localFlow(err, checked)
111+
checked = definedFrom(ptr) or checked = definedFrom(err)
102112
|
103113
checksValue(node, checked)
104114
)

0 commit comments

Comments
 (0)