Skip to content

Commit 8d844e8

Browse files
committed
Switch to use-use dataflow. This will make post-update nodes easy to implement.
Queries / tests that required changes: * The CleartextLogging and MissingErrorCheck queries are updated because they assumed def-use flow * The CommandInjection query works around the shortcomings of use-use flow by essentially reintroducing def-use flow when it applies a sanitizer * The OpenUrlRedirect query currently just accepts its fate; the tests are updated to avoid excess sanitization while the query comments on the problem. We should choose this approach or the CommandInjection one.
1 parent 0d1865d commit 8d844e8

File tree

16 files changed

+776
-353
lines changed

16 files changed

+776
-353
lines changed

go/ql/lib/semmle/go/dataflow/SSA.qll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,8 @@ class SsaExplicitDefinition extends SsaDefinition, TExplicitDef {
197197
override string prettyPrintDef() { result = "definition of " + this.getSourceVariable() }
198198

199199
override Location getLocation() { result = this.getInstruction().getLocation() }
200+
201+
IR::Instruction getAFirstUse() { firstUse(this, result) }
200202
}
201203

202204
/** Provides a helper predicate for working with explicit SSA definitions. */
@@ -410,3 +412,5 @@ DataFlow::Node getASimilarReadNode(DataFlow::Node node) {
410412
result = readFields.similar().getAUse()
411413
)
412414
}
415+
416+
IR::Instruction getAnAdjacentUse(IR::Instruction pred) { adjacentUseUse(pred, result) }

go/ql/lib/semmle/go/dataflow/SsaImpl.qll

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,8 @@ private module Internal {
199199
/**
200200
* Holds if the `i`th node of `bb` is a use or an SSA definition of variable `v`, with
201201
* `k` indicating whether it is the former or the latter.
202+
*
203+
* Note this includes phi nodes, whereas `ref` above only includes explicit writes and captures.
202204
*/
203205
private predicate ssaRef(ReachableBasicBlock bb, int i, SsaSourceVariable v, RefKind k) {
204206
useAt(bb, i, v) and k = ReadRef()
@@ -290,6 +292,145 @@ private module Internal {
290292
or
291293
rewindReads(bb, i, v) = 1 and result = getDefReachingEndOf(bb.getImmediateDominator(), v)
292294
}
295+
296+
private module AdjacentUsesImpl {
297+
/** Holds if `v` is defined or used in `b`. */
298+
private predicate varOccursInBlock(SsaSourceVariable v, ReachableBasicBlock b) {
299+
ssaRef(b, _, v, _)
300+
}
301+
302+
/** Holds if `v` occurs in `b` or one of `b`'s transitive successors. */
303+
private predicate blockPrecedesVar(SsaSourceVariable v, ReachableBasicBlock b) {
304+
varOccursInBlock(v, b)
305+
or
306+
exists(getDefReachingEndOf(b, v))
307+
}
308+
309+
/**
310+
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
311+
* in `b2` or one of its transitive successors but not in any block on the path
312+
* between `b1` and `b2`.
313+
*/
314+
private predicate varBlockReaches(
315+
SsaSourceVariable v, ReachableBasicBlock b1, ReachableBasicBlock b2
316+
) {
317+
varOccursInBlock(v, b1) and
318+
b2 = b1.getASuccessor() and
319+
blockPrecedesVar(v, b2)
320+
or
321+
exists(ReachableBasicBlock mid |
322+
varBlockReaches(v, b1, mid) and
323+
b2 = mid.getASuccessor() and
324+
not varOccursInBlock(v, mid) and
325+
blockPrecedesVar(v, b2)
326+
)
327+
}
328+
329+
/**
330+
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
331+
* `b2` but not in any block on the path between `b1` and `b2`.
332+
*/
333+
private predicate varBlockStep(
334+
SsaSourceVariable v, ReachableBasicBlock b1, ReachableBasicBlock b2
335+
) {
336+
varBlockReaches(v, b1, b2) and
337+
varOccursInBlock(v, b2)
338+
}
339+
340+
/**
341+
* Gets the maximum rank among all SSA references to `v` in basic block `bb`.
342+
*/
343+
private int maxSsaRefRank(ReachableBasicBlock bb, SsaSourceVariable v) {
344+
result = max(ssaRefRank(bb, _, v, _))
345+
}
346+
347+
/**
348+
* Holds if `v` occurs at index `i1` in `b1` and at index `i2` in `b2` and
349+
* there is a path between them without any occurrence of `v`.
350+
*/
351+
pragma[nomagic]
352+
predicate adjacentVarRefs(
353+
SsaSourceVariable v, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2, int i2
354+
) {
355+
exists(int rankix |
356+
b1 = b2 and
357+
ssaRefRank(b1, i1, v, _) = rankix and
358+
ssaRefRank(b2, i2, v, _) = rankix + 1
359+
)
360+
or
361+
maxSsaRefRank(b1, v) = ssaRefRank(b1, i1, v, _) and
362+
varBlockStep(v, b1, b2) and
363+
ssaRefRank(b2, i2, v, _) = 1
364+
}
365+
366+
predicate variableUse(SsaSourceVariable v, IR::Instruction use, ReachableBasicBlock bb, int i) {
367+
bb.getNode(i) = use and
368+
exists(SsaVariable sv |
369+
sv.getSourceVariable() = v and
370+
use = sv.getAUse()
371+
)
372+
}
373+
}
374+
375+
private import AdjacentUsesImpl
376+
377+
/**
378+
* Holds if the value defined at `def` can reach `use` without passing through
379+
* any other uses, but possibly through phi nodes.
380+
*/
381+
cached
382+
predicate firstUse(SsaDefinition def, IR::Instruction use) {
383+
exists(SsaSourceVariable v, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2, int i2 |
384+
adjacentVarRefs(v, b1, i1, b2, i2) and
385+
def.definesAt(b1, i1, v) and
386+
variableUse(v, use, b2, i2)
387+
)
388+
or
389+
exists(
390+
SsaSourceVariable v, SsaPhiNode redef, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2,
391+
int i2
392+
|
393+
adjacentVarRefs(v, b1, i1, b2, i2) and
394+
def.definesAt(b1, i1, v) and
395+
redef.definesAt(b2, i2, v) and
396+
firstUse(redef, use)
397+
)
398+
}
399+
400+
/**
401+
* Holds if `use1` and `use2` form an adjacent use-use-pair of the same SSA
402+
* variable, that is, the value read in `use1` can reach `use2` without passing
403+
* through any other use or any SSA definition of the variable.
404+
*/
405+
cached
406+
predicate adjacentUseUseSameVar(IR::Instruction use1, IR::Instruction use2) {
407+
exists(SsaSourceVariable v, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2, int i2 |
408+
adjacentVarRefs(v, b1, i1, b2, i2) and
409+
variableUse(v, use1, b1, i1) and
410+
variableUse(v, use2, b2, i2)
411+
)
412+
}
413+
414+
/**
415+
* Holds if `use1` and `use2` form an adjacent use-use-pair of the same
416+
* `SsaSourceVariable`, that is, the value read in `use1` can reach `use2`
417+
* without passing through any other use or any SSA definition of the variable
418+
* except for phi nodes and uncertain implicit updates.
419+
*/
420+
cached
421+
predicate adjacentUseUse(IR::Instruction use1, IR::Instruction use2) {
422+
adjacentUseUseSameVar(use1, use2)
423+
or
424+
exists(
425+
SsaSourceVariable v, SsaPhiNode def, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2,
426+
int i2
427+
|
428+
adjacentVarRefs(v, b1, i1, b2, i2) and
429+
variableUse(v, use1, b1, i1) and
430+
def.definesAt(b2, i2, v) and
431+
firstUse(def, use2)
432+
)
433+
}
293434
}
294435

295436
import Internal

go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,26 +65,36 @@ predicate basicLocalFlowStep(Node nodeFrom, Node nodeTo) {
6565
else nodeTo.asInstruction() = evalAssert
6666
)
6767
or
68-
// Instruction -> SSA
68+
// Instruction -> SSA defn
6969
exists(IR::Instruction pred, SsaExplicitDefinition succ |
7070
succ.getRhs() = pred and
7171
nodeFrom = instructionNode(pred) and
7272
nodeTo = ssaNode(succ)
7373
)
7474
or
75-
// SSA -> SSA
76-
exists(SsaDefinition pred, SsaPseudoDefinition succ | succ.getAnInput() = pred |
75+
// SSA defn -> SSA capture
76+
exists(SsaExplicitDefinition pred, SsaVariableCapture succ |
77+
// Check: should these flow from PHIs as well? Perhaps they should be included
78+
// in the use-use graph?
79+
succ.(SsaVariableCapture).getSourceVariable() = pred.(SsaExplicitDefinition).getSourceVariable()
80+
|
7781
nodeFrom = ssaNode(pred) and
7882
nodeTo = ssaNode(succ)
7983
)
8084
or
81-
// SSA -> Instruction
82-
exists(SsaDefinition pred, IR::Instruction succ |
83-
succ = pred.getVariable().getAUse() and
85+
// SSA defn -> first SSA use
86+
exists(SsaExplicitDefinition pred, IR::Instruction succ | succ = pred.getAFirstUse() |
8487
nodeFrom = ssaNode(pred) and
8588
nodeTo = instructionNode(succ)
8689
)
8790
or
91+
// SSA use -> successive SSA use
92+
// Note this case includes Phi node traversal
93+
exists(IR::Instruction pred, IR::Instruction succ | succ = getAnAdjacentUse(pred) |
94+
nodeFrom = instructionNode(pred) and
95+
nodeTo = instructionNode(succ)
96+
)
97+
or
8898
// GlobalFunctionNode -> use
8999
nodeFrom =
90100
any(GlobalFunctionNode fn | fn.getFunction() = nodeTo.asExpr().(FunctionName).getTarget())

go/ql/lib/semmle/go/security/CleartextLoggingCustomizations.qll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ module CleartextLogging {
5555
|
5656
this.asExpr().(Ident).getName() = name
5757
or
58+
this.(DataFlow::SsaNode).getSourceVariable().getName() = name
59+
or
5860
this.(DataFlow::FieldReadNode).getFieldName() = name
5961
or
6062
this.(DataFlow::CallNode).getCalleeName() = name
@@ -143,7 +145,7 @@ module CleartextLogging {
143145
not this instanceof NonCleartextPassword and
144146
name.regexpMatch(maybePassword()) and
145147
(
146-
this.asExpr().(Ident).getName() = name
148+
this.(DataFlow::SsaNode).getSourceVariable().getName() = name
147149
or
148150
exists(DataFlow::FieldReadNode fn |
149151
fn = this and

go/ql/lib/semmle/go/security/CommandInjection.qll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,28 @@ module CommandInjection {
8080
node instanceof Sanitizer or
8181
node = any(ArgumentArrayWithDoubleDash array).getASanitizedElement()
8282
}
83+
84+
// Hack: with use-use flow, we might have x (use at line 1) -> x (use at line 2),
85+
// x (use at line 1) -> array at line 1 and x (use at line 2) -> array at line 2,
86+
// in the context
87+
//
88+
// array1 := {"--", x}
89+
// array2 := {x, "--"}
90+
//
91+
// We want to taint array2 but not array1, which suggests excluding the edge x (use 1) -> array1
92+
// However isSanitizer only allows us to remove nodes (isSanitizerIn/Out permit removing all outgoing
93+
// or incoming edges); we can't remove an individual edge, so instead we supply extra edges connecting
94+
// the definition with the next use.
95+
predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) {
96+
exists(
97+
ArgumentArrayWithDoubleDash array, DataFlow::InstructionNode sanitized,
98+
DataFlow::SsaNode defn
99+
|
100+
sanitized = array.getASanitizedElement() and sanitized = defn.getAUse()
101+
|
102+
pred = defn and succ = sanitized.getASuccessor()
103+
)
104+
}
83105
}
84106

85107
/**

go/ql/src/InconsistentCode/MissingErrorCheck.ql

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,16 @@ predicate checksValue(IR::Instruction instruction, DataFlow::SsaNode value) {
7272
)
7373
}
7474

75+
// Now that we have use-use flow, phi nodes aren't directly involved in the flow graph. TODO: change this?
76+
DataFlow::SsaNode phiDefinedFrom(DataFlow::SsaNode node) {
77+
result.getDefinition().(SsaPseudoDefinition).getAnInput() = node.getDefinition().getVariable()
78+
}
79+
80+
DataFlow::SsaNode definedFrom(DataFlow::SsaNode node) {
81+
DataFlow::localFlow(node, result) or
82+
result = phiDefinedFrom*(node)
83+
}
84+
7585
/**
7686
* Matches if `call` is a function returning (`ptr`, `err`) where `ptr` may be nil, and neither
7787
* `ptr` not `err` has been checked for validity as of `node`.
@@ -98,7 +108,7 @@ predicate returnUncheckedAtNode(
98108
// localFlow is used to permit checks via either an SSA phi node or ordinary assignment.
99109
returnUncheckedAtNode(call, node.getAPredecessor(), ptr, err) and
100110
not exists(DataFlow::SsaNode checked |
101-
DataFlow::localFlow(ptr, checked) or DataFlow::localFlow(err, checked)
111+
checked = definedFrom(ptr) or checked = definedFrom(err)
102112
|
103113
checksValue(node, checked)
104114
)

0 commit comments

Comments
 (0)