Skip to content

Commit 3dea467

Browse files
authored
Merge pull request github#15047 from MathiasVP/add-puns-for-addresses-of-arguments
C++: Add `PostUpdateNode`s for addresses of outgoing arguments
2 parents 412ea67 + 97f2be9 commit 3dea467

File tree

9 files changed

+317
-207
lines changed

9 files changed

+317
-207
lines changed

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ private newtype TIRDataFlowNode =
3939
} or
4040
TPostUpdateNodeImpl(Operand operand, int indirectionIndex) {
4141
operand = any(FieldAddress fa).getObjectAddressOperand() and
42-
indirectionIndex = [1 .. Ssa::countIndirectionsForCppType(Ssa::getLanguageType(operand))]
42+
indirectionIndex = [0 .. Ssa::countIndirectionsForCppType(Ssa::getLanguageType(operand))]
4343
or
4444
Ssa::isModifiableByCall(operand, indirectionIndex)
4545
} or
@@ -436,7 +436,12 @@ class Node extends TIRDataFlowNode {
436436
* `x.set(taint())` is a partial definition of `x`, and `transfer(&x, taint())` is
437437
* a partial definition of `&x`).
438438
*/
439-
Expr asPartialDefinition() { result = this.(PartialDefinitionNode).getDefinedExpr() }
439+
Expr asPartialDefinition() {
440+
exists(PartialDefinitionNode pdn | this = pdn |
441+
pdn.getIndirectionIndex() > 0 and
442+
result = pdn.getDefinedExpr()
443+
)
444+
}
440445

441446
/**
442447
* Gets an upper bound on the type of this node.
@@ -563,11 +568,17 @@ private class PostUpdateNodeImpl extends PartialDefinitionNode, TPostUpdateNodeI
563568
Operand getOperand() { result = operand }
564569

565570
/** Gets the indirection index associated with this node. */
566-
int getIndirectionIndex() { result = indirectionIndex }
571+
override int getIndirectionIndex() { result = indirectionIndex }
567572

568573
override Location getLocationImpl() { result = operand.getLocation() }
569574

570-
final override Node getPreUpdateNode() { hasOperandAndIndex(result, operand, indirectionIndex) }
575+
final override Node getPreUpdateNode() {
576+
indirectionIndex > 0 and
577+
hasOperandAndIndex(result, operand, indirectionIndex)
578+
or
579+
indirectionIndex = 0 and
580+
result.asOperand() = operand
581+
}
571582

572583
final override Expr getDefinedExpr() {
573584
result = operand.getDef().getUnconvertedResultExpression()
@@ -836,12 +847,14 @@ class IndirectArgumentOutNode extends PostUpdateNodeImpl {
836847
Function getStaticCallTarget() { result = this.getCallInstruction().getStaticCallTarget() }
837848

838849
override string toStringImpl() {
839-
// This string should be unique enough to be helpful but common enough to
840-
// avoid storing too many different strings.
841-
result = this.getStaticCallTarget().getName() + " output argument"
842-
or
843-
not exists(this.getStaticCallTarget()) and
844-
result = "output argument"
850+
exists(string prefix | if indirectionIndex > 0 then prefix = "" else prefix = "pointer to " |
851+
// This string should be unique enough to be helpful but common enough to
852+
// avoid storing too many different strings.
853+
result = prefix + this.getStaticCallTarget().getName() + " output argument"
854+
or
855+
not exists(this.getStaticCallTarget()) and
856+
result = prefix + "output argument"
857+
)
845858
}
846859
}
847860

@@ -1699,6 +1712,10 @@ abstract class PostUpdateNode extends Node {
16991712
* ```
17001713
*/
17011714
abstract private class PartialDefinitionNode extends PostUpdateNode {
1715+
/** Gets the indirection index of this node. */
1716+
abstract int getIndirectionIndex();
1717+
1718+
/** Gets the expression that is partially defined by this node. */
17021719
abstract Expr getDefinedExpr();
17031720
}
17041721

@@ -1713,6 +1730,8 @@ abstract private class PartialDefinitionNode extends PostUpdateNode {
17131730
* `getVariableAccess()` equal to `x`.
17141731
*/
17151732
class DefinitionByReferenceNode extends IndirectArgumentOutNode {
1733+
DefinitionByReferenceNode() { this.getIndirectionIndex() > 0 }
1734+
17161735
/** Gets the unconverted argument corresponding to this node. */
17171736
Expr getArgument() { result = this.getAddressOperand().getDef().getUnconvertedResultExpression() }
17181737

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll

Lines changed: 133 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -417,56 +417,34 @@ class BaseCallVariable extends AbstractBaseSourceVariable, TBaseCallVariable {
417417
override CppType getLanguageType() { result = getResultLanguageType(call) }
418418
}
419419

420-
/**
421-
* Holds if the value pointed to by `operand` can potentially be
422-
* modified be the caller.
423-
*/
424-
predicate isModifiableByCall(ArgumentOperand operand, int indirectionIndex) {
425-
exists(CallInstruction call, int index, CppType type |
426-
indirectionIndex = [1 .. countIndirectionsForCppType(type)] and
427-
type = getLanguageType(operand) and
428-
call.getArgumentOperand(index) = operand and
429-
if index = -1
430-
then
431-
// A qualifier is "modifiable" if:
432-
// 1. the member function is not const specified, or
433-
// 2. the member function is `const` specified, but returns a pointer or reference
434-
// type that is non-const.
435-
//
436-
// To see why this is necessary, consider the following function:
437-
// ```
438-
// struct C {
439-
// void* data_;
440-
// void* data() const { return data; }
441-
// };
442-
// ...
443-
// C c;
444-
// memcpy(c.data(), source, 16)
445-
// ```
446-
// the data pointed to by `c.data_` is potentially modified by the call to `memcpy` even though
447-
// `C::data` has a const specifier. So we further place the restriction that the type returned
448-
// by `call` should not be of the form `const T*` (for some deeply const type `T`).
449-
if call.getStaticCallTarget() instanceof Cpp::ConstMemberFunction
450-
then
451-
exists(PointerOrArrayOrReferenceType resultType |
452-
resultType = call.getResultType() and
453-
not resultType.isDeeplyConstBelow()
454-
)
455-
else any()
456-
else
457-
// An argument is modifiable if it's a non-const pointer or reference type.
458-
isModifiableAt(type, indirectionIndex)
459-
)
460-
}
461-
462-
/**
463-
* Holds if `t` is a pointer or reference type that supports at least `indirectionIndex` number
464-
* of indirections, and the `indirectionIndex` indirection cannot be modfiied by passing a
465-
* value of `t` to a function.
466-
*/
467-
private predicate isModifiableAtImpl(CppType cppType, int indirectionIndex) {
468-
indirectionIndex = [1 .. countIndirectionsForCppType(cppType)] and
469-
(
420+
private module IsModifiableAtImpl {
421+
/**
422+
* Holds if the `indirectionIndex`'th dereference of a value of type
423+
* `cppType` is a type that can be modified (either by modifying the value
424+
* itself or one of its fields if it's a class type).
425+
*
426+
* For example, a value of type `const int* const` cannot be modified
427+
* at any indirection index (because it's a constant pointer to constant
428+
* data), and a value of type `int *const *` is modifiable at indirection index
429+
* 2 only.
430+
*
431+
* A value of type `const S2* s2` where `s2` is
432+
* ```cpp
433+
* struct S { int x; }
434+
* ```
435+
* can be modified at indirection index 1. This is to ensure that we generate
436+
* a `PostUpdateNode` for the argument corresponding to the `s2` parameter in
437+
* an example such as:
438+
* ```cpp
439+
* void set_field(const S2* s2)
440+
* {
441+
* s2->s->x = 42;
442+
* }
443+
* ```
444+
*/
445+
bindingset[cppType, indirectionIndex]
446+
pragma[inline_late]
447+
private predicate impl(CppType cppType, int indirectionIndex) {
470448
exists(Type pointerType, Type base, Type t |
471449
pointerType = t.getUnderlyingType() and
472450
pointerType = any(Indirection ind).getUnderlyingType() and
@@ -480,29 +458,115 @@ private predicate isModifiableAtImpl(CppType cppType, int indirectionIndex) {
480458
// one of the members was modified.
481459
exists(base.stripType().(Cpp::Class).getAField())
482460
)
461+
}
462+
463+
/**
464+
* Holds if `cppType` is modifiable with an indirection index of at least 1.
465+
*
466+
* This predicate factored out into a separate predicate for two reasons:
467+
* - This predicate needs to be recursive because, if a type is modifiable
468+
* at indirection `i`, then it's also modifiable at indirection index `i+1`
469+
* (because the pointer could be completely re-assigned at indirection `i`).
470+
* - We special-case indirection index `0` so that pointer arguments that can
471+
* be modified at some index always have a `PostUpdateNode` at indiretion
472+
* index 0 even though the 0'th indirection can never be modified by a
473+
* callee.
474+
*/
475+
private predicate isModifiableAtImplAtLeast1(CppType cppType, int indirectionIndex) {
476+
indirectionIndex = [1 .. countIndirectionsForCppType(cppType)] and
477+
(
478+
impl(cppType, indirectionIndex)
479+
or
480+
// If the `indirectionIndex`'th dereference of a type can be modified
481+
// then so can the `indirectionIndex + 1`'th dereference.
482+
isModifiableAtImplAtLeast1(cppType, indirectionIndex - 1)
483+
)
484+
}
485+
486+
/**
487+
* Holds if `cppType` is modifiable at indirection index 0.
488+
*
489+
* In reality, the 0'th indirection of a pointer (i.e., the pointer itself)
490+
* can never be modified by a callee, but it is sometimes useful to be able
491+
* to specify the value of the pointer, as its coming out of a function, as
492+
* a source of dataflow since the shared library's reverse-read mechanism
493+
* then ensures that field-flow is accounted for.
494+
*/
495+
private predicate isModifiableAtImplAt0(CppType cppType) { impl(cppType, 0) }
496+
497+
/**
498+
* Holds if `t` is a pointer or reference type that supports at least
499+
* `indirectionIndex` number of indirections, and the `indirectionIndex`
500+
* indirection cannot be modfiied by passing a value of `t` to a function.
501+
*/
502+
private predicate isModifiableAtImpl(CppType cppType, int indirectionIndex) {
503+
isModifiableAtImplAtLeast1(cppType, indirectionIndex)
483504
or
484-
// If the `indirectionIndex`'th dereference of a type can be modified
485-
// then so can the `indirectionIndex + 1`'th dereference.
486-
isModifiableAtImpl(cppType, indirectionIndex - 1)
487-
)
488-
}
505+
indirectionIndex = 0 and
506+
isModifiableAtImplAt0(cppType)
507+
}
489508

490-
/**
491-
* Holds if `t` is a type with at least `indirectionIndex` number of indirections,
492-
* and the `indirectionIndex` indirection can be modified by passing a value of
493-
* type `t` to a function function.
494-
*/
495-
bindingset[indirectionIndex]
496-
predicate isModifiableAt(CppType cppType, int indirectionIndex) {
497-
isModifiableAtImpl(cppType, indirectionIndex)
498-
or
499-
exists(PointerWrapper pw, Type t |
500-
cppType.hasType(t, _) and
501-
t.stripType() = pw and
502-
not pw.pointsToConst()
503-
)
509+
/**
510+
* Holds if `t` is a type with at least `indirectionIndex` number of
511+
* indirections, and the `indirectionIndex` indirection can be modified by
512+
* passing a value of type `t` to a function function.
513+
*/
514+
bindingset[indirectionIndex]
515+
predicate isModifiableAt(CppType cppType, int indirectionIndex) {
516+
isModifiableAtImpl(cppType, indirectionIndex)
517+
or
518+
exists(PointerWrapper pw, Type t |
519+
cppType.hasType(t, _) and
520+
t.stripType() = pw and
521+
not pw.pointsToConst()
522+
)
523+
}
524+
525+
/**
526+
* Holds if the value pointed to by `operand` can potentially be
527+
* modified be the caller.
528+
*/
529+
predicate isModifiableByCall(ArgumentOperand operand, int indirectionIndex) {
530+
exists(CallInstruction call, int index, CppType type |
531+
indirectionIndex = [0 .. countIndirectionsForCppType(type)] and
532+
type = getLanguageType(operand) and
533+
call.getArgumentOperand(index) = operand and
534+
if index = -1
535+
then
536+
// A qualifier is "modifiable" if:
537+
// 1. the member function is not const specified, or
538+
// 2. the member function is `const` specified, but returns a pointer or reference
539+
// type that is non-const.
540+
//
541+
// To see why this is necessary, consider the following function:
542+
// ```
543+
// struct C {
544+
// void* data_;
545+
// void* data() const { return data; }
546+
// };
547+
// ...
548+
// C c;
549+
// memcpy(c.data(), source, 16)
550+
// ```
551+
// the data pointed to by `c.data_` is potentially modified by the call to `memcpy` even though
552+
// `C::data` has a const specifier. So we further place the restriction that the type returned
553+
// by `call` should not be of the form `const T*` (for some deeply const type `T`).
554+
if call.getStaticCallTarget() instanceof Cpp::ConstMemberFunction
555+
then
556+
exists(PointerOrArrayOrReferenceType resultType |
557+
resultType = call.getResultType() and
558+
not resultType.isDeeplyConstBelow()
559+
)
560+
else any()
561+
else
562+
// An argument is modifiable if it's a non-const pointer or reference type.
563+
isModifiableAt(type, indirectionIndex)
564+
)
565+
}
504566
}
505567

568+
import IsModifiableAtImpl
569+
506570
abstract class BaseSourceVariableInstruction extends Instruction {
507571
/** Gets the base source variable accessed by this instruction. */
508572
abstract BaseSourceVariable getBaseSourceVariable();

cpp/ql/src/Critical/DoubleFree.ql

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@ import semmle.code.cpp.dataflow.new.DataFlow
1616
import FlowAfterFree
1717
import DoubleFree::PathGraph
1818

19-
predicate isFree(DataFlow::Node n, Expr e) { isFree(n, e, _) }
19+
/**
20+
* Holds if `n` is a dataflow node that represents a pointer going into a
21+
* deallocation function, and `e` is the corresponding expression.
22+
*/
23+
predicate isFree(DataFlow::Node n, Expr e) { isFree(_, n, e, _) }
2024

2125
/**
2226
* `dealloc1` is a deallocation expression and `e` is an expression such
@@ -28,7 +32,7 @@ predicate isFree(DataFlow::Node n, Expr e) { isFree(n, e, _) }
2832
*/
2933
bindingset[dealloc1, e]
3034
predicate isExcludeFreePair(DeallocationExpr dealloc1, Expr e) {
31-
exists(DeallocationExpr dealloc2 | isFree(_, e, dealloc2) |
35+
exists(DeallocationExpr dealloc2 | isFree(_, _, e, dealloc2) |
3236
dealloc1.(FunctionCall).getTarget().hasGlobalName("MmFreePagesFromMdl") and
3337
// From https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-mmfreepagesfrommdl:
3438
// "After calling MmFreePagesFromMdl, the caller must also call ExFreePool
@@ -42,7 +46,7 @@ module DoubleFree = FlowFromFree<isFree/2, isExcludeFreePair/2>;
4246
from DoubleFree::PathNode source, DoubleFree::PathNode sink, DeallocationExpr dealloc, Expr e2
4347
where
4448
DoubleFree::flowPath(source, sink) and
45-
isFree(source.getNode(), _, dealloc) and
49+
isFree(source.getNode(), _, _, dealloc) and
4650
isFree(sink.getNode(), e2)
4751
select sink.getNode(), source, sink,
4852
"Memory pointed to by '" + e2.toString() + "' may already have been freed by $@.", dealloc,

cpp/ql/src/Critical/FlowAfterFree.qll

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,12 @@ predicate strictlyDominates(IRBlock b1, int i1, IRBlock b2, int i2) {
5050
module FlowFromFree<isSinkSig/2 isASink, isExcludedSig/2 isExcluded> {
5151
module FlowFromFreeConfig implements DataFlow::StateConfigSig {
5252
class FlowState instanceof Expr {
53-
FlowState() { isFree(_, this, _) }
53+
FlowState() { isFree(_, _, this, _) }
5454

5555
string toString() { result = super.toString() }
5656
}
5757

58-
predicate isSource(DataFlow::Node node, FlowState state) { isFree(node, state, _) }
58+
predicate isSource(DataFlow::Node node, FlowState state) { isFree(node, _, state, _) }
5959

6060
pragma[inline]
6161
predicate isSink(DataFlow::Node sink, FlowState state) {
@@ -64,7 +64,7 @@ module FlowFromFree<isSinkSig/2 isASink, isExcludedSig/2 isExcluded> {
6464
DeallocationExpr dealloc
6565
|
6666
isASink(sink, e) and
67-
isFree(source, state, dealloc) and
67+
isFree(source, _, state, dealloc) and
6868
e != state and
6969
source.hasIndexInBlock(b1, i1) and
7070
sink.hasIndexInBlock(b2, i2) and
@@ -94,14 +94,17 @@ module FlowFromFree<isSinkSig/2 isASink, isExcludedSig/2 isExcluded> {
9494
}
9595

9696
/**
97-
* Holds if `n` is a dataflow node such that `n.asExpr() = e` and `e`
98-
* is being freed by a deallocation expression `dealloc`.
97+
* Holds if `outgoing` is a dataflow node that represents the pointer passed to
98+
* `dealloc` after the call returns (i.e., the post-update node associated with
99+
* the argument to `dealloc`), and `incoming` is the corresponding argument
100+
* node going into `dealloc` (i.e., the pre-update node of `outgoing`).
99101
*/
100-
predicate isFree(DataFlow::Node n, Expr e, DeallocationExpr dealloc) {
102+
predicate isFree(DataFlow::Node outgoing, DataFlow::Node incoming, Expr e, DeallocationExpr dealloc) {
101103
exists(Expr conv |
102104
e = conv.getUnconverted() and
103105
conv = dealloc.getFreedExpr().getFullyConverted() and
104-
conv = n.asConvertedExpr()
106+
incoming = outgoing.(DataFlow::PostUpdateNode).getPreUpdateNode() and
107+
conv = incoming.asConvertedExpr()
105108
) and
106109
// Ignore realloc functions
107110
not exists(dealloc.(FunctionCall).getTarget().(AllocationFunction).getReallocPtrArg())

cpp/ql/src/Critical/UseAfterFree.ql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ private predicate externalCallNeverDereferences(FormattingFunctionCall call, int
3030
}
3131

3232
predicate isUse0(Expr e) {
33-
not isFree(_, e, _) and
33+
not isFree(_, _, e, _) and
3434
(
3535
e = any(PointerDereferenceExpr pde).getOperand()
3636
or
@@ -170,6 +170,6 @@ module UseAfterFree = FlowFromFree<isUse/2, isExcludeFreeUsePair/2>;
170170
from UseAfterFree::PathNode source, UseAfterFree::PathNode sink, DeallocationExpr dealloc
171171
where
172172
UseAfterFree::flowPath(source, sink) and
173-
isFree(source.getNode(), _, dealloc)
173+
isFree(source.getNode(), _, _, dealloc)
174174
select sink.getNode(), source, sink, "Memory may have been previously freed by $@.", dealloc,
175175
dealloc.toString()

0 commit comments

Comments
 (0)