Skip to content

Commit 1f9e2c7

Browse files
authored
Merge pull request #14928 from MathiasVP/surprising-lifetimes-c_str
C++: Add a new query for calling `c_str` on temporary objects
2 parents 399872b + 351caac commit 1f9e2c7

File tree

13 files changed

+473
-9
lines changed

13 files changed

+473
-9
lines changed

cpp/ql/lib/semmle/code/cpp/models/implementations/StdContainer.qll

Lines changed: 68 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -123,14 +123,25 @@ private class StdSequenceContainerData extends TaintFunction {
123123
/**
124124
* The standard container functions `push_back` and `push_front`.
125125
*/
126-
private class StdSequenceContainerPush extends TaintFunction {
126+
class StdSequenceContainerPush extends MemberFunction {
127127
StdSequenceContainerPush() {
128128
this.getClassAndName("push_back") instanceof Vector or
129129
this.getClassAndName(["push_back", "push_front"]) instanceof Deque or
130130
this.getClassAndName("push_front") instanceof ForwardList or
131131
this.getClassAndName(["push_back", "push_front"]) instanceof List
132132
}
133133

134+
/**
135+
* Gets the index of a parameter to this function that is a reference to the
136+
* value type of the container.
137+
*/
138+
int getAValueTypeParameterIndex() {
139+
this.getParameter(result).getUnspecifiedType().(ReferenceType).getBaseType() =
140+
this.getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType() // i.e. the `T` of this `std::vector<T>`
141+
}
142+
}
143+
144+
private class StdSequenceContainerPushModel extends StdSequenceContainerPush, TaintFunction {
134145
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
135146
// flow from parameter to qualifier
136147
input.isParameterDeref(0) and
@@ -160,7 +171,7 @@ private class StdSequenceContainerFrontBack extends TaintFunction {
160171
/**
161172
* The standard container functions `insert` and `insert_after`.
162173
*/
163-
private class StdSequenceContainerInsert extends TaintFunction {
174+
class StdSequenceContainerInsert extends MemberFunction {
164175
StdSequenceContainerInsert() {
165176
this.getClassAndName("insert") instanceof Deque or
166177
this.getClassAndName("insert") instanceof List or
@@ -181,7 +192,9 @@ private class StdSequenceContainerInsert extends TaintFunction {
181192
* Gets the index of a parameter to this function that is an iterator.
182193
*/
183194
int getAnIteratorParameterIndex() { this.getParameter(result).getType() instanceof Iterator }
195+
}
184196

197+
private class StdSequenceContainerInsertModel extends StdSequenceContainerInsert, TaintFunction {
185198
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
186199
// flow from parameter to container itself (qualifier) and return value
187200
(
@@ -253,11 +266,28 @@ private class StdSequenceContainerAt extends TaintFunction {
253266
}
254267

255268
/**
256-
* The standard vector `emplace` function.
269+
* The standard `emplace` function.
257270
*/
258-
class StdVectorEmplace extends TaintFunction {
259-
StdVectorEmplace() { this.getClassAndName("emplace") instanceof Vector }
271+
class StdSequenceEmplace extends MemberFunction {
272+
StdSequenceEmplace() {
273+
this.getClassAndName("emplace") instanceof Vector
274+
or
275+
this.getClassAndName("emplace") instanceof List
276+
or
277+
this.getClassAndName("emplace") instanceof Deque
278+
}
279+
280+
/**
281+
* Gets the index of a parameter to this function that is a reference to the
282+
* value type of the container.
283+
*/
284+
int getAValueTypeParameterIndex() {
285+
this.getParameter(result).getUnspecifiedType().(ReferenceType).getBaseType() =
286+
this.getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType() // i.e. the `T` of this `std::vector<T>`
287+
}
288+
}
260289

290+
private class StdSequenceEmplaceModel extends StdSequenceEmplace, TaintFunction {
261291
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
262292
// flow from any parameter except the position iterator to qualifier and return value
263293
// (here we assume taint flow from any constructor parameter to the constructed object)
@@ -269,16 +299,47 @@ class StdVectorEmplace extends TaintFunction {
269299
}
270300
}
271301

302+
/**
303+
* The standard vector `emplace` function.
304+
*/
305+
class StdVectorEmplace extends StdSequenceEmplace {
306+
StdVectorEmplace() { this.getDeclaringType() instanceof Vector }
307+
}
308+
272309
/**
273310
* The standard vector `emplace_back` function.
274311
*/
275-
class StdVectorEmplaceBack extends TaintFunction {
276-
StdVectorEmplaceBack() { this.getClassAndName("emplace_back") instanceof Vector }
312+
class StdSequenceEmplaceBack extends MemberFunction {
313+
StdSequenceEmplaceBack() {
314+
this.getClassAndName("emplace_back") instanceof Vector
315+
or
316+
this.getClassAndName("emplace_back") instanceof List
317+
or
318+
this.getClassAndName("emplace_back") instanceof Deque
319+
}
320+
321+
/**
322+
* Gets the index of a parameter to this function that is a reference to the
323+
* value type of the container.
324+
*/
325+
int getAValueTypeParameterIndex() {
326+
this.getParameter(result).getUnspecifiedType().(ReferenceType).getBaseType() =
327+
this.getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType() // i.e. the `T` of this `std::vector<T>`
328+
}
329+
}
277330

331+
private class StdSequenceEmplaceBackModel extends StdSequenceEmplaceBack, TaintFunction {
278332
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
279333
// flow from any parameter to qualifier
280334
// (here we assume taint flow from any constructor parameter to the constructed object)
281335
input.isParameterDeref([0 .. this.getNumberOfParameters() - 1]) and
282336
output.isQualifierObject()
283337
}
284338
}
339+
340+
/**
341+
* The standard vector `emplace_back` function.
342+
*/
343+
class StdVectorEmplaceBack extends StdSequenceEmplaceBack {
344+
StdVectorEmplaceBack() { this.getDeclaringType() instanceof Vector }
345+
}

cpp/ql/lib/semmle/code/cpp/models/implementations/StdString.qll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,11 @@ private class StdStringConstructor extends Constructor, StdStringTaintFunction {
9999
/**
100100
* The `std::string` function `c_str`.
101101
*/
102-
private class StdStringCStr extends StdStringTaintFunction {
102+
class StdStringCStr extends MemberFunction {
103103
StdStringCStr() { this.getClassAndName("c_str") instanceof StdBasicString }
104+
}
104105

106+
private class StdStringCStrModel extends StdStringCStr, StdStringTaintFunction {
105107
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
106108
// flow from string itself (qualifier) to return value
107109
input.isQualifierObject() and
@@ -112,9 +114,11 @@ private class StdStringCStr extends StdStringTaintFunction {
112114
/**
113115
* The `std::string` function `data`.
114116
*/
115-
private class StdStringData extends StdStringTaintFunction {
117+
class StdStringData extends MemberFunction {
116118
StdStringData() { this.getClassAndName("data") instanceof StdBasicString }
119+
}
117120

121+
private class StdStringDataModel extends StdStringData, StdStringTaintFunction {
118122
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
119123
// flow from string itself (qualifier) to return value
120124
input.isQualifierObject() and
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>Calling <code>c_str</code> on a <code>std::string</code> object returns a pointer to the underlying character array.
8+
When the <code>std::string</code> object is destroyed, the pointer returned by <code>c_str</code> is no
9+
longer valid. If the pointer is used after the <code>std::string</code> object is destroyed, then the behavior is undefined.
10+
</p>
11+
</overview>
12+
13+
<recommendation>
14+
<p>
15+
Ensure that the pointer returned by <code>c_str</code> does not outlive the underlying <code>std::string</code> object.
16+
</p>
17+
</recommendation>
18+
19+
<example>
20+
<p>
21+
The following example concatenates two <code>std::string</code> objects, and then converts the resulting string to a
22+
C string using <code>c_str</code> so that it can be passed to the <code>work</code> function.
23+
24+
However, the underlying <code>std::string</code> object that represents the concatenated string is destroyed as soon as the call
25+
to <code>c_str</code> returns. This means that <code>work</code> is given a pointer to invalid memory.
26+
</p>
27+
28+
<sample src="UseOfStringAfterLifetimeEndsBad.cpp" />
29+
30+
<p>
31+
The following example fixes the above code by ensuring that the pointer returned by the call to <code>c_str</code> does
32+
not outlive the underlying <code>std::string</code> objects. This ensures that the pointer passed to <code>work</code>
33+
points to valid memory.
34+
</p>
35+
36+
<sample src="UseOfStringAfterLifetimeEndsGood.cpp" />
37+
38+
</example>
39+
<references>
40+
41+
<li><a href="https://wiki.sei.cmu.edu/confluence/display/cplusplus/MEM50-CPP.+Do+not+access+freed+memory">MEM50-CPP. Do not access freed memory</a>.</li>
42+
43+
</references>
44+
</qhelp>
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/**
2+
* @name Use of string after lifetime ends
3+
* @description If the value of a call to 'c_str' outlives the underlying object it may lead to unexpected behavior.
4+
* @kind problem
5+
* @precision high
6+
* @id cpp/use-of-string-after-lifetime-ends
7+
* @problem.severity warning
8+
* @security-severity 8.8
9+
* @tags reliability
10+
* security
11+
* external/cwe/cwe-416
12+
* external/cwe/cwe-664
13+
*/
14+
15+
import cpp
16+
import semmle.code.cpp.models.implementations.StdString
17+
import semmle.code.cpp.models.implementations.StdContainer
18+
19+
/**
20+
* Holds if `e` will be consumed by its parent as a glvalue and does not have
21+
* an lvalue-to-rvalue conversion. This means that it will be materialized into
22+
* a temporary object.
23+
*/
24+
predicate isTemporary(Expr e) {
25+
e instanceof TemporaryObjectExpr
26+
or
27+
e.isPRValueCategory() and
28+
e.getUnspecifiedType() instanceof Class and
29+
not e.hasLValueToRValueConversion()
30+
}
31+
32+
/** Holds if `e` is written to a container. */
33+
predicate isStoredInContainer(Expr e) {
34+
exists(StdSequenceContainerInsert insert, Call call, int index |
35+
call = insert.getACallToThisFunction() and
36+
index = insert.getAValueTypeParameterIndex() and
37+
call.getArgument(index) = e
38+
)
39+
or
40+
exists(StdSequenceContainerPush push, Call call, int index |
41+
call = push.getACallToThisFunction() and
42+
index = push.getAValueTypeParameterIndex() and
43+
call.getArgument(index) = e
44+
)
45+
or
46+
exists(StdSequenceEmplace emplace, Call call, int index |
47+
call = emplace.getACallToThisFunction() and
48+
index = emplace.getAValueTypeParameterIndex() and
49+
call.getArgument(index) = e
50+
)
51+
or
52+
exists(StdSequenceEmplaceBack emplaceBack, Call call, int index |
53+
call = emplaceBack.getACallToThisFunction() and
54+
index = emplaceBack.getAValueTypeParameterIndex() and
55+
call.getArgument(index) = e
56+
)
57+
}
58+
59+
/**
60+
* Holds if the value of `e` outlives the enclosing full expression. For
61+
* example, because the value is stored in a local variable.
62+
*/
63+
predicate outlivesFullExpr(Expr e) {
64+
any(Assignment assign).getRValue() = e
65+
or
66+
any(Variable v).getInitializer().getExpr() = e
67+
or
68+
any(ReturnStmt ret).getExpr() = e
69+
or
70+
exists(ConditionalExpr cond |
71+
outlivesFullExpr(cond) and
72+
[cond.getThen(), cond.getElse()] = e
73+
)
74+
or
75+
exists(BinaryOperation bin |
76+
outlivesFullExpr(bin) and
77+
bin.getAnOperand() = e
78+
)
79+
or
80+
exists(ClassAggregateLiteral aggr |
81+
outlivesFullExpr(aggr) and
82+
aggr.getAFieldExpr(_) = e
83+
)
84+
or
85+
exists(ArrayAggregateLiteral aggr |
86+
outlivesFullExpr(aggr) and
87+
aggr.getAnElementExpr(_) = e
88+
)
89+
or
90+
isStoredInContainer(e)
91+
}
92+
93+
from Call c
94+
where
95+
outlivesFullExpr(c) and
96+
not c.isFromUninstantiatedTemplate(_) and
97+
(c.getTarget() instanceof StdStringCStr or c.getTarget() instanceof StdStringData) and
98+
isTemporary(c.getQualifier().getFullyConverted())
99+
select c,
100+
"The underlying string object is destroyed after the call to '" + c.getTarget() + "' returns."
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#include <string>
2+
void work(const char*);
3+
4+
// BAD: the concatenated string is deallocated when `c_str` returns. So `work`
5+
// is given a pointer to invalid memory.
6+
void work_with_combined_string_bad(std::string s1, std::string s2) {
7+
const char* combined_string = (s1 + s2).c_str();
8+
work(combined_string);
9+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#include <string>
2+
void work(const char*);
3+
4+
// GOOD: the concatenated string outlives the call to `work`. So the pointer
5+
// obtainted from `c_str` is valid.
6+
void work_with_combined_string_good(std::string s1, std::string s2) {
7+
auto combined_string = s1 + s2;
8+
work(combined_string.c_str());
9+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: newQuery
3+
---
4+
* Added a new query, `cpp/use-of-string-after-lifetime-ends`, to detect calls to `c_str` on strings that will be destroyed immediately.

0 commit comments

Comments
 (0)