Skip to content

Commit 8ce4bbe

Browse files
authored
Merge pull request github#14867 from MathiasVP/reduce-duplication-from-operators
C++: Reduce duplication from crement operations
2 parents 70ff59e + 2908acf commit 8ce4bbe

File tree

9 files changed

+212
-34
lines changed

9 files changed

+212
-34
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Added a new predicate `Node.asDefinition` on `DataFlow::Node`s for selecting the dataflow node corresponding to a particular definition.

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Lines changed: 168 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,71 @@ class Node extends TIRDataFlowNode {
260260
*/
261261
Expr asDefiningArgument() { result = this.asDefiningArgument(_) }
262262

263+
/**
264+
* Gets the definition associated with this node, if any.
265+
*
266+
* For example, consider the following example
267+
* ```cpp
268+
* int x = 42; // 1
269+
* x = 34; // 2
270+
* ++x; // 3
271+
* x++; // 4
272+
* x += 1; // 5
273+
* int y = x += 2; // 6
274+
* ```
275+
* - For (1) the result is `42`.
276+
* - For (2) the result is `x = 34`.
277+
* - For (3) the result is `++x`.
278+
* - For (4) the result is `x++`.
279+
* - For (5) the result is `x += 1`.
280+
* - For (6) there are two results:
281+
* - For the definition generated by `x += 2` the result is `x += 2`
282+
* - For the definition generated by `int y = ...` the result is
283+
* also `x += 2`.
284+
*
285+
* For assignments, `node.asDefinition()` and `node.asExpr()` will both exist
286+
* for the same dataflow node. However, for expression such as `x++` that
287+
* both write to `x` and read the current value of `x`, `node.asDefinition()`
288+
* will give the node corresponding to the value after the increment, and
289+
* `node.asExpr()` will give the node corresponding to the value before the
290+
* increment. For an example of this, consider the following:
291+
*
292+
* ```cpp
293+
* sink(x++);
294+
* ```
295+
* in the above program, there will not be flow from a node `n` such that
296+
* `n.asDefinition() instanceof IncrementOperation` to the argument of `sink`
297+
* since the value passed to `sink` is the value before to the increment.
298+
* However, there will be dataflow from a node `n` such that
299+
* `n.asExpr() instanceof IncrementOperation` since the result of evaluating
300+
* the expression `x++` is passed to `sink`.
301+
*/
302+
Expr asDefinition() {
303+
exists(StoreInstruction store |
304+
store = this.asInstruction() and
305+
result = asDefinitionImpl(store)
306+
)
307+
}
308+
309+
/**
310+
* Gets the indirect definition at a given indirection corresponding to this
311+
* node, if any.
312+
*
313+
* See the comments on `Node.asDefinition` for examples.
314+
*/
315+
Expr asIndirectDefinition(int indirectionIndex) {
316+
exists(StoreInstruction store |
317+
this.(IndirectInstruction).hasInstructionAndIndirectionIndex(store, indirectionIndex) and
318+
result = asDefinitionImpl(store)
319+
)
320+
}
321+
322+
/**
323+
* Gets the indirect definition at some indirection corresponding to this
324+
* node, if any.
325+
*/
326+
Expr asIndirectDefinition() { result = this.asIndirectDefinition(_) }
327+
263328
/**
264329
* Gets the argument that defines this `DefinitionByReferenceNode`, if any.
265330
*
@@ -1142,30 +1207,14 @@ private module GetConvertedResultExpression {
11421207
}
11431208

11441209
private Expr getConvertedResultExpressionImpl0(Instruction instr) {
1145-
// For an expression such as `i += 2` we pretend that the generated
1146-
// `StoreInstruction` contains the result of the expression even though
1147-
// this isn't totally aligned with the C/C++ standard.
1148-
exists(TranslatedAssignOperation tao |
1149-
result = tao.getExpr() and
1150-
instr = tao.getInstruction(any(AssignmentStoreTag tag))
1151-
)
1152-
or
1153-
// Similarly for `i++` and `++i` we pretend that the generated
1154-
// `StoreInstruction` is contains the result of the expression even though
1155-
// this isn't totally aligned with the C/C++ standard.
1156-
exists(TranslatedCrementOperation tco |
1157-
result = tco.getExpr() and
1158-
instr = tco.getInstruction(any(CrementStoreTag tag))
1159-
)
1160-
or
11611210
// IR construction inserts an additional cast to a `size_t` on the extent
11621211
// of a `new[]` expression. The resulting `ConvertInstruction` doesn't have
11631212
// a result for `getConvertedResultExpression`. We remap this here so that
11641213
// this `ConvertInstruction` maps to the result of the expression that
11651214
// represents the extent.
11661215
exists(TranslatedNonConstantAllocationSize tas |
11671216
result = tas.getExtent().getExpr() and
1168-
instr = tas.getInstruction(any(AllocationExtentConvertTag tag))
1217+
instr = tas.getInstruction(AllocationExtentConvertTag())
11691218
)
11701219
or
11711220
// There's no instruction that returns `ParenthesisExpr`, but some queries
@@ -1174,6 +1223,39 @@ private module GetConvertedResultExpression {
11741223
result = ttc.getExpr().(ParenthesisExpr) and
11751224
instr = ttc.getResult()
11761225
)
1226+
or
1227+
// Certain expressions generate `CopyValueInstruction`s only when they
1228+
// are needed. Examples of this include crement operations and compound
1229+
// assignment operations. For example:
1230+
// ```cpp
1231+
// int x = ...
1232+
// int y = x++;
1233+
// ```
1234+
// this generate IR like:
1235+
// ```
1236+
// r1(glval<int>) = VariableAddress[x] :
1237+
// r2(int) = Constant[0] :
1238+
// m3(int) = Store[x] : &:r1, r2
1239+
// r4(glval<int>) = VariableAddress[y] :
1240+
// r5(glval<int>) = VariableAddress[x] :
1241+
// r6(int) = Load[x] : &:r5, m3
1242+
// r7(int) = Constant[1] :
1243+
// r8(int) = Add : r6, r7
1244+
// m9(int) = Store[x] : &:r5, r8
1245+
// r11(int) = CopyValue : r6
1246+
// m12(int) = Store[y] : &:r4, r11
1247+
// ```
1248+
// When the `CopyValueInstruction` is not generated there is no instruction
1249+
// whose `getConvertedResultExpression` maps back to the expression. When
1250+
// such an instruction doesn't exist it means that the old value is not
1251+
// needed, and in that case the only value that will propagate forward in
1252+
// the program is the value that's been updated. So in those cases we just
1253+
// use the result of `node.asDefinition()` as the result of `node.asExpr()`.
1254+
exists(TranslatedCoreExpr tco |
1255+
tco.getInstruction(_) = instr and
1256+
tco.producesExprResult() and
1257+
result = asDefinitionImpl0(instr)
1258+
)
11771259
}
11781260

11791261
private Expr getConvertedResultExpressionImpl(Instruction instr) {
@@ -1182,6 +1264,75 @@ private module GetConvertedResultExpression {
11821264
not exists(getConvertedResultExpressionImpl0(instr)) and
11831265
result = instr.getConvertedResultExpression()
11841266
}
1267+
1268+
/**
1269+
* Gets the result for `node.asDefinition()` (when `node` is the instruction
1270+
* node that wraps `store`) in the cases where `store.getAst()` should not be
1271+
* used to define the result of `node.asDefinition()`.
1272+
*/
1273+
private Expr asDefinitionImpl0(StoreInstruction store) {
1274+
// For an expression such as `i += 2` we pretend that the generated
1275+
// `StoreInstruction` contains the result of the expression even though
1276+
// this isn't totally aligned with the C/C++ standard.
1277+
exists(TranslatedAssignOperation tao |
1278+
store = tao.getInstruction(AssignmentStoreTag()) and
1279+
result = tao.getExpr()
1280+
)
1281+
or
1282+
// Similarly for `i++` and `++i` we pretend that the generated
1283+
// `StoreInstruction` is contains the result of the expression even though
1284+
// this isn't totally aligned with the C/C++ standard.
1285+
exists(TranslatedCrementOperation tco |
1286+
store = tco.getInstruction(CrementStoreTag()) and
1287+
result = tco.getExpr()
1288+
)
1289+
}
1290+
1291+
/**
1292+
* Holds if the expression returned by `store.getAst()` should not be
1293+
* returned as the result of `node.asDefinition()` when `node` is the
1294+
* instruction node that wraps `store`.
1295+
*/
1296+
private predicate excludeAsDefinitionResult(StoreInstruction store) {
1297+
// Exclude the store to the temporary generated by a ternary expression.
1298+
exists(TranslatedConditionalExpr tce |
1299+
store = tce.getInstruction(ConditionValueFalseStoreTag())
1300+
or
1301+
store = tce.getInstruction(ConditionValueTrueStoreTag())
1302+
)
1303+
}
1304+
1305+
/**
1306+
* Gets the expression that represents the result of `StoreInstruction` for
1307+
* dataflow purposes.
1308+
*
1309+
* For example, consider the following example
1310+
* ```cpp
1311+
* int x = 42; // 1
1312+
* x = 34; // 2
1313+
* ++x; // 3
1314+
* x++; // 4
1315+
* x += 1; // 5
1316+
* int y = x += 2; // 6
1317+
* ```
1318+
* For (1) the result is `42`.
1319+
* For (2) the result is `x = 34`.
1320+
* For (3) the result is `++x`.
1321+
* For (4) the result is `x++`.
1322+
* For (5) the result is `x += 1`.
1323+
* For (6) there are two results:
1324+
* - For the `StoreInstruction` generated by `x += 2` the result
1325+
* is `x += 2`
1326+
* - For the `StoreInstruction` generated by `int y = ...` the result
1327+
* is also `x += 2`
1328+
*/
1329+
Expr asDefinitionImpl(StoreInstruction store) {
1330+
not exists(asDefinitionImpl0(store)) and
1331+
not excludeAsDefinitionResult(store) and
1332+
result = store.getAst().(Expr).getUnconverted()
1333+
or
1334+
result = asDefinitionImpl0(store)
1335+
}
11851336
}
11861337

11871338
private import GetConvertedResultExpression

cpp/ql/src/Likely Bugs/Leap Year/LeapYear.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,8 @@ private module PossibleYearArithmeticOperationCheckConfig implements DataFlow::C
345345
)
346346
}
347347

348+
predicate isBarrierIn(DataFlow::Node node) { isSource(node) }
349+
348350
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
349351
// flow from anything on the RHS of an assignment to a time/date structure to that
350352
// assignment.

cpp/ql/src/Security/CWE/CWE-190/IntegerOverflowTainted.ql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@ predicate isSource(FS::FlowSource source, string sourceType) { sourceType = sour
3535

3636
predicate isSink(DataFlow::Node sink, string kind) {
3737
exists(Expr use |
38-
use = sink.asExpr() and
3938
not use.getUnspecifiedType() instanceof PointerType and
4039
outOfBoundsExpr(use, kind) and
41-
not inSystemMacroExpansion(use)
40+
not inSystemMacroExpansion(use) and
41+
use = sink.asExpr()
4242
)
4343
}
4444

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
| test.cpp:173:29:173:51 | ... & ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:170:2:170:47 | ... += ... | ... += ... |
2-
| test.cpp:173:29:173:51 | ... & ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:170:16:170:47 | ... * ... | ... * ... |
32
| test.cpp:174:30:174:45 | ... >> ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:170:2:170:47 | ... += ... | ... += ... |
4-
| test.cpp:174:30:174:45 | ... >> ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:170:16:170:47 | ... * ... | ... * ... |
53
| test.cpp:193:15:193:24 | ... / ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:193:15:193:24 | ... / ... | ... / ... |
64
| test.cpp:217:29:217:51 | ... & ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:214:2:214:47 | ... += ... | ... += ... |
7-
| test.cpp:217:29:217:51 | ... & ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:214:16:214:47 | ... * ... | ... * ... |
85
| test.cpp:218:30:218:45 | ... >> ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:214:2:214:47 | ... += ... | ... += ... |
9-
| test.cpp:218:30:218:45 | ... >> ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:214:16:214:47 | ... * ... | ... * ... |

cpp/ql/test/query-tests/Security/CWE/CWE-119/semmle/tests/OverflowBuffer.expected

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@
4747
| tests.cpp:546:6:546:10 | call to fread | This 'fread' operation may access 400 bytes but the $@ is only 100 bytes. | tests.cpp:532:7:532:16 | charBuffer | destination buffer |
4848
| tests.cpp:569:6:569:15 | access to array | This array indexing operation accesses a negative index -1 on the $@. | tests.cpp:565:7:565:12 | buffer | array |
4949
| tests.cpp:577:7:577:13 | access to array | This array indexing operation accesses a negative index -1 on the $@. | tests.cpp:565:7:565:12 | buffer | array |
50+
| tests.cpp:637:6:637:15 | access to array | This array indexing operation accesses a negative index -1 on the $@. | tests.cpp:633:7:633:12 | buffer | array |
51+
| tests.cpp:645:7:645:13 | access to array | This array indexing operation accesses a negative index -1 on the $@. | tests.cpp:633:7:633:12 | buffer | array |
5052
| tests_restrict.c:12:2:12:7 | call to memcpy | This 'memcpy' operation accesses 2 bytes but the $@ is only 1 byte. | tests_restrict.c:7:6:7:13 | smallbuf | source buffer |
5153
| unions.cpp:26:2:26:7 | call to memset | This 'memset' operation accesses 200 bytes but the $@ is only 100 bytes. | unions.cpp:21:10:21:11 | mu | destination buffer |
5254
| unions.cpp:30:2:30:7 | call to memset | This 'memset' operation accesses 200 bytes but the $@ is only 100 bytes. | unions.cpp:15:7:15:11 | small | destination buffer |

cpp/ql/test/query-tests/Security/CWE/CWE-119/semmle/tests/UnboundedWrite.expected

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
edges
22
| main.cpp:6:27:6:30 | argv indirection | main.cpp:10:20:10:23 | argv indirection |
3-
| main.cpp:10:20:10:23 | argv indirection | tests.cpp:631:32:631:35 | argv indirection |
3+
| main.cpp:10:20:10:23 | argv indirection | tests.cpp:657:32:657:35 | argv indirection |
44
| tests.cpp:613:19:613:24 | source indirection | tests.cpp:615:17:615:22 | source indirection |
55
| tests.cpp:622:19:622:24 | source indirection | tests.cpp:625:2:625:16 | ... = ... indirection |
66
| tests.cpp:625:2:625:16 | ... = ... indirection | tests.cpp:625:4:625:7 | s indirection [post update] [home indirection] |
77
| tests.cpp:625:4:625:7 | s indirection [post update] [home indirection] | tests.cpp:628:14:628:14 | s indirection [home indirection] |
88
| tests.cpp:628:14:628:14 | s indirection [home indirection] | tests.cpp:628:14:628:19 | home indirection |
99
| tests.cpp:628:14:628:14 | s indirection [home indirection] | tests.cpp:628:16:628:19 | home indirection |
1010
| tests.cpp:628:16:628:19 | home indirection | tests.cpp:628:14:628:19 | home indirection |
11-
| tests.cpp:631:32:631:35 | argv indirection | tests.cpp:656:9:656:15 | access to array indirection |
12-
| tests.cpp:631:32:631:35 | argv indirection | tests.cpp:657:9:657:15 | access to array indirection |
13-
| tests.cpp:656:9:656:15 | access to array indirection | tests.cpp:613:19:613:24 | source indirection |
14-
| tests.cpp:657:9:657:15 | access to array indirection | tests.cpp:622:19:622:24 | source indirection |
11+
| tests.cpp:657:32:657:35 | argv indirection | tests.cpp:682:9:682:15 | access to array indirection |
12+
| tests.cpp:657:32:657:35 | argv indirection | tests.cpp:683:9:683:15 | access to array indirection |
13+
| tests.cpp:682:9:682:15 | access to array indirection | tests.cpp:613:19:613:24 | source indirection |
14+
| tests.cpp:683:9:683:15 | access to array indirection | tests.cpp:622:19:622:24 | source indirection |
1515
nodes
1616
| main.cpp:6:27:6:30 | argv indirection | semmle.label | argv indirection |
1717
| main.cpp:10:20:10:23 | argv indirection | semmle.label | argv indirection |
@@ -23,9 +23,9 @@ nodes
2323
| tests.cpp:628:14:628:14 | s indirection [home indirection] | semmle.label | s indirection [home indirection] |
2424
| tests.cpp:628:14:628:19 | home indirection | semmle.label | home indirection |
2525
| tests.cpp:628:16:628:19 | home indirection | semmle.label | home indirection |
26-
| tests.cpp:631:32:631:35 | argv indirection | semmle.label | argv indirection |
27-
| tests.cpp:656:9:656:15 | access to array indirection | semmle.label | access to array indirection |
28-
| tests.cpp:657:9:657:15 | access to array indirection | semmle.label | access to array indirection |
26+
| tests.cpp:657:32:657:35 | argv indirection | semmle.label | argv indirection |
27+
| tests.cpp:682:9:682:15 | access to array indirection | semmle.label | access to array indirection |
28+
| tests.cpp:683:9:683:15 | access to array indirection | semmle.label | access to array indirection |
2929
subpaths
3030
#select
3131
| tests.cpp:615:2:615:7 | call to strcpy | main.cpp:6:27:6:30 | argv indirection | tests.cpp:615:17:615:22 | source indirection | This 'call to strcpy' with input from $@ may overflow the destination. | main.cpp:6:27:6:30 | argv indirection | a command-line argument |

cpp/ql/test/query-tests/Security/CWE/CWE-119/semmle/tests/tests.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,32 @@ void test25(char* source) {
628628
strcpy(buf, s.home); // BAD
629629
}
630630

631+
void test26(bool cond)
632+
{
633+
char buffer[100];
634+
char *ptr;
635+
int i;
636+
637+
if (buffer[-1] == 0) { return; } // BAD: accesses buffer[-1]
638+
639+
ptr = buffer;
640+
if (cond)
641+
{
642+
ptr += 1;
643+
if (ptr[-1] == 0) { return; } // GOOD: accesses buffer[0]
644+
} else {
645+
if (ptr[-1] == 0) { return; } // BAD: accesses buffer[-1]
646+
}
647+
if (ptr[-1] == 0) { return; } // BAD: accesses buffer[-1] or buffer[0] [NOT DETECTED]
648+
649+
ptr = buffer;
650+
for (i = 0; i < 2; i++)
651+
{
652+
ptr += 1;
653+
}
654+
if (ptr[-1] == 0) { return; } // GOOD: accesses buffer[1]
655+
}
656+
631657
int tests_main(int argc, char *argv[])
632658
{
633659
long long arr17[19];

0 commit comments

Comments
 (0)