Skip to content

Commit 5e06043

Browse files
committed
C++: Completely get rid of merged path nodes.
1 parent 2c2f9b9 commit 5e06043

File tree

2 files changed

+230
-826
lines changed

2 files changed

+230
-826
lines changed

cpp/ql/src/experimental/Security/CWE/CWE-193/InvalidPointerDeref.ql

Lines changed: 67 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import semmle.code.cpp.ir.ValueNumbering
2323
import semmle.code.cpp.controlflow.IRGuards
2424
import semmle.code.cpp.ir.IR
2525
import codeql.util.Unit
26+
import FinalFlow::PathGraph
2627

2728
pragma[nomagic]
2829
Instruction getABoundIn(SemBound b, IRFunction func) {
@@ -420,64 +421,22 @@ predicate invalidPointerToDerefSource(
420421
)
421422
}
422423

423-
newtype TMergedPathNode =
424-
// The path from the allocation to the address of the store/load.
425-
TFinalPathNode(FinalFlow::PathNode p) or
426-
// The read/write that uses the invalid pointer identified by `InvalidPointerToDerefConfig`.
427-
// This one is needed because the sink identified by `InvalidPointerToDerefConfig` is the
428-
// pointer, but we want to raise an alert at the dereference.
429-
TPathNodeSink(Instruction i) {
430-
exists(DataFlow::Node n |
431-
InvalidPointerToDerefFlow::flowTo(pragma[only_bind_into](n)) and
432-
isInvalidPointerDerefSink(n, i, _, _) and
433-
i = getASuccessor(n.asInstruction())
434-
)
435-
}
436-
437-
class MergedPathNode extends TMergedPathNode {
438-
string toString() { none() }
439-
440-
final FinalFlow::PathNode asPathNode() { this = TFinalPathNode(result) }
441-
442-
final Instruction asSinkNode() { this = TPathNodeSink(result) }
443-
444-
predicate hasLocationInfo(
445-
string filepath, int startline, int startcolumn, int endline, int endcolumn
446-
) {
447-
none()
448-
}
449-
}
450-
451-
class FinalPathNode extends MergedPathNode, TFinalPathNode {
452-
override string toString() {
453-
exists(FinalFlow::PathNode p |
454-
this = TFinalPathNode(p) and
455-
result = p.toString()
456-
)
457-
}
458-
459-
override predicate hasLocationInfo(
460-
string filepath, int startline, int startcolumn, int endline, int endcolumn
461-
) {
462-
this.asPathNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
463-
}
464-
}
465-
466-
class PathSinkNode extends MergedPathNode, TPathNodeSink {
467-
override string toString() {
468-
exists(Instruction i |
469-
this = TPathNodeSink(i) and
470-
result = i.toString()
471-
)
472-
}
473-
474-
override predicate hasLocationInfo(
475-
string filepath, int startline, int startcolumn, int endline, int endcolumn
476-
) {
477-
this.asSinkNode()
478-
.getLocation()
479-
.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
480-
}
424+
/**
425+
* Holds if `derefSink` is a dataflow node that represents an out-of-bounds address that is about to
426+
* be dereferenced by `operation` (which is either a `StoreInstruction` or `LoadInstruction`), and
427+
* `pai` is the pointer-arithmetic operation that caused the `derefSink` to be out-of-bounds.
428+
*/
429+
predicate derefSinkToOperation(
430+
DataFlow::Node derefSink, PointerArithmeticInstruction pai, DataFlow::Node operation
431+
) {
432+
exists(DataFlow::Node source, Instruction i |
433+
InvalidPointerToDerefFlow::flow(pragma[only_bind_into](source),
434+
pragma[only_bind_into](derefSink)) and
435+
invalidPointerToDerefSource(_, pai, source, _) and
436+
isInvalidPointerDerefSink(derefSink, i, _, _) and
437+
i = getASuccessor(derefSink.asInstruction()) and
438+
operation.asInstruction() = i
439+
)
481440
}
482441

483442
/**
@@ -491,8 +450,8 @@ class PathSinkNode extends MergedPathNode, TPathNodeSink {
491450
module FinalConfig implements DataFlow::StateConfigSig {
492451
newtype FlowState =
493452
additional TInitial() or
494-
additional TPointerArith(PointerArithmeticInstruction pai, int delta) {
495-
invalidPointerToDerefSource(_, pai, _, delta)
453+
additional TPointerArith(PointerArithmeticInstruction pai) {
454+
invalidPointerToDerefSource(_, pai, _, _)
496455
}
497456

498457
predicate isSource(DataFlow::Node source, FlowState state) {
@@ -504,10 +463,8 @@ module FinalConfig implements DataFlow::StateConfigSig {
504463
}
505464

506465
predicate isSink(DataFlow::Node sink, FlowState state) {
507-
exists(DataFlow::Node source, PointerArithmeticInstruction pai, int delta |
508-
InvalidPointerToDerefFlow::flow(source, sink) and
509-
invalidPointerToDerefSource(_, pai, source, delta) and
510-
state = TPointerArith(pai, delta)
466+
exists(PointerArithmeticInstruction pai |
467+
derefSinkToOperation(_, pai, sink) and state = TPointerArith(pai)
511468
)
512469
}
513470

@@ -516,70 +473,73 @@ module FinalConfig implements DataFlow::StateConfigSig {
516473
predicate isAdditionalFlowStep(
517474
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
518475
) {
476+
// A step from the left-hand side of a pointer-arithmetic operation that has been
477+
// identified as creating an out-of-bounds pointer to the result of the pointer-arithmetic
478+
// operation.
519479
exists(
520480
PointerArithmeticInstruction pai, AllocToInvalidPointerFlow::PathNode1 p1,
521-
InvalidPointerToDerefFlow::PathNode p2, int delta
481+
InvalidPointerToDerefFlow::PathNode p2
522482
|
523483
isSinkImpl(pai, node1, _, _) and
524-
invalidPointerToDerefSource(_, pai, node2, delta) and
484+
invalidPointerToDerefSource(_, pai, node2, _) and
525485
node1 = p1.getNode() and
526486
node2 = p2.getNode() and
527487
state1 = TInitial() and
528-
state2 = TPointerArith(pai, delta)
488+
state2 = TPointerArith(pai)
489+
)
490+
or
491+
// A step from an out-of-bounds address to the operation (which is either a `StoreInstruction`
492+
// or a `LoadInstruction`) that dereferences the address.
493+
// This step exists purely for aesthetic reasons: we want the alert to be placed at the operation
494+
// that causes the dereference, and not at the address that flows into the operation.
495+
state1 = state2 and
496+
exists(DataFlow::Node derefSource, PointerArithmeticInstruction pai |
497+
InvalidPointerToDerefFlow::flow(derefSource, node1) and
498+
invalidPointerToDerefSource(_, pai, derefSource, _) and
499+
state1 = TPointerArith(pai) and
500+
derefSinkToOperation(node1, pai, node2)
529501
)
530502
}
531503
}
532504

533505
module FinalFlow = DataFlow::GlobalWithState<FinalConfig>;
534506

535-
query predicate edges(MergedPathNode node1, MergedPathNode node2) {
536-
FinalFlow::PathGraph::edges(node1.asPathNode(), node2.asPathNode())
537-
or
538-
isInvalidPointerDerefSink(node1.asPathNode().getNode(), node2.asSinkNode(), _, _)
539-
}
540-
541-
query predicate nodes(MergedPathNode n, string key, string val) {
542-
FinalFlow::PathGraph::nodes(n.asPathNode(), key, val)
543-
or
544-
key = "semmle.label" and val = n.asSinkNode().toString()
545-
}
546-
547-
query predicate subpaths(
548-
MergedPathNode arg, MergedPathNode par, MergedPathNode ret, MergedPathNode out
549-
) {
550-
FinalFlow::PathGraph::subpaths(arg.asPathNode(), par.asPathNode(), ret.asPathNode(),
551-
out.asPathNode())
552-
}
553-
507+
/**
508+
* Holds if `source` is an allocation that flows into the left-hand side of `pai`, which produces an out-of-bounds
509+
* pointer that flows into an address that is dereferenced by `sink` (which is either a `LoadInstruction` or a
510+
* `StoreInstruction`). The end result is that `sink` writes to an address that is off-by-`delta` from the end of
511+
* the allocation. The string `operation` describes whether the `sink` is a load or a store (which is then used
512+
* to produce the alert message).
513+
*
514+
* Note that multiple `delta`s can exist for a given `(source, pai, sink)` triplet.
515+
*/
554516
predicate hasFlowPath(
555-
MergedPathNode mergedSource, MergedPathNode mergedSink,
556-
InvalidPointerToDerefFlow::PathNode source3, PointerArithmeticInstruction pai, string operation,
557-
int delta
517+
FinalFlow::PathNode source, FinalFlow::PathNode sink, PointerArithmeticInstruction pai,
518+
string operation, int delta
558519
) {
559-
exists(FinalFlow::PathNode source, FinalFlow::PathNode sink3, int delta0, int delta1 |
560-
source = mergedSource.asPathNode() and
561-
FinalFlow::flowPath(source, sink3) and
562-
invalidPointerToDerefSource(source.getNode(), pai, source3.getNode(), delta0) and
563-
sink3.getState() = FinalConfig::TPointerArith(pai, delta0) and
564-
isInvalidPointerDerefSink(sink3.getNode(), mergedSink.asSinkNode(), operation, delta1) and
565-
delta = delta0 + delta1
520+
exists(
521+
DataFlow::Node derefSink, DataFlow::Node derefSource, int deltaDerefSourceAndPai,
522+
int deltaDerefSinkAndDerefAddress
523+
|
524+
FinalFlow::flowPath(source, sink) and
525+
sink.getState() = FinalConfig::TPointerArith(pai) and
526+
invalidPointerToDerefSource(source.getNode(), pai, derefSource, deltaDerefSourceAndPai) and
527+
InvalidPointerToDerefFlow::flow(derefSource, derefSink) and
528+
derefSinkToOperation(derefSink, pai, sink.getNode()) and
529+
isInvalidPointerDerefSink(derefSink, sink.getNode().asInstruction(), operation,
530+
deltaDerefSinkAndDerefAddress) and
531+
delta = deltaDerefSourceAndPai + deltaDerefSinkAndDerefAddress
566532
)
567533
}
568534

569535
from
570-
MergedPathNode source, MergedPathNode sink, int k, string kstr, PointerArithmeticInstruction pai,
571-
string operation, Expr offset, DataFlow::Node n
536+
FinalFlow::PathNode source, FinalFlow::PathNode sink, int k, string kstr,
537+
PointerArithmeticInstruction pai, string operation, Expr offset, DataFlow::Node n
572538
where
573-
k =
574-
min(int k2, int k3, InvalidPointerToDerefFlow::PathNode source3 |
575-
hasFlowPath(source, sink, source3, pai, operation, k3) and
576-
invalidPointerToDerefSource(source.asPathNode().getNode(), pai, source3.getNode(), k2)
577-
|
578-
k2 + k3
579-
) and
539+
k = min(int cand | hasFlowPath(source, sink, pai, operation, cand)) and
580540
offset = pai.getRight().getUnconvertedResultExpression() and
581-
n = source.asPathNode().getNode() and
541+
n = source.getNode() and
582542
if k = 0 then kstr = "" else kstr = " + " + k
583-
select sink, source, sink,
543+
select sink.getNode().asInstruction(), source, sink,
584544
"This " + operation + " might be out of bounds, as the pointer might be equal to $@ + $@" + kstr +
585545
".", n, n.toString(), offset, offset.toString()

0 commit comments

Comments
 (0)