Skip to content

Commit b087fde

Browse files
authored
Merge pull request github#17678 from MathiasVP/modernize-unclear-array-index-validation
C++: Modernize `cpp/unclear-array-index-validation`
2 parents 918e435 + 61a012f commit b087fde

File tree

6 files changed

+136
-93
lines changed

6 files changed

+136
-93
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* The `SimpleRangeAnalysis` library (`semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis`) now generates more precise ranges for calls to `fgetc` and `getc`.

cpp/ql/lib/semmle/code/cpp/rangeanalysis/SimpleRangeAnalysis.qll

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,37 @@ private class UnsignedMulExpr extends MulExpr {
192192
}
193193
}
194194

195+
/**
196+
* Gets the value of the `EOF` macro.
197+
*
198+
* This is typically `"-1"`, but this is not guaranteed to be the case on all
199+
* systems.
200+
*/
201+
private int getEofValue() {
202+
exists(MacroInvocation mi |
203+
mi.getMacroName() = "EOF" and
204+
result = unique( | | mi.getExpr().getValue().toInt())
205+
)
206+
}
207+
208+
/** Get standard `getc` function or related variants. */
209+
private class Getc extends Function {
210+
Getc() { this.hasGlobalOrStdOrBslName(["fgetc", "getc"]) }
211+
}
212+
213+
/** A call to `getc` */
214+
private class CallToGetc extends FunctionCall {
215+
CallToGetc() { this.getTarget() instanceof Getc }
216+
}
217+
218+
/**
219+
* A call to `getc` that we can analyze because we know
220+
* the value of the `EOF` macro.
221+
*/
222+
private class AnalyzableCallToGetc extends CallToGetc {
223+
AnalyzableCallToGetc() { exists(getEofValue()) }
224+
}
225+
195226
/**
196227
* Holds if `expr` is effectively a multiplication of `operand` with the
197228
* positive constant `positive`.
@@ -287,6 +318,8 @@ private predicate analyzableExpr(Expr e) {
287318
or
288319
e instanceof RemExpr
289320
or
321+
e instanceof AnalyzableCallToGetc
322+
or
290323
// A conversion is analyzable, provided that its child has an arithmetic
291324
// type. (Sometimes the child is a reference type, and so does not get
292325
// any bounds.) Rather than checking whether the type of the child is
@@ -861,6 +894,14 @@ private float getLowerBoundsImpl(Expr expr) {
861894
)
862895
)
863896
or
897+
exists(AnalyzableCallToGetc getc |
898+
expr = getc and
899+
// from https://en.cppreference.com/w/c/io/fgetc:
900+
// On success, returns the obtained character as an unsigned char
901+
// converted to an int. On failure, returns EOF.
902+
result = min([typeLowerBound(any(UnsignedCharType pct)), getEofValue()])
903+
)
904+
or
864905
// If the conversion is to an arithmetic type then we just return the
865906
// lower bound of the child. We do not need to handle truncation and
866907
// overflow here, because that is done in `getTruncatedLowerBounds`.
@@ -1055,6 +1096,14 @@ private float getUpperBoundsImpl(Expr expr) {
10551096
)
10561097
)
10571098
or
1099+
exists(AnalyzableCallToGetc getc |
1100+
expr = getc and
1101+
// from https://en.cppreference.com/w/c/io/fgetc:
1102+
// On success, returns the obtained character as an unsigned char
1103+
// converted to an int. On failure, returns EOF.
1104+
result = max([typeUpperBound(any(UnsignedCharType pct)), getEofValue()])
1105+
)
1106+
or
10581107
// If the conversion is to an arithmetic type then we just return the
10591108
// upper bound of the child. We do not need to handle truncation and
10601109
// overflow here, because that is done in `getTruncatedUpperBounds`.

cpp/ql/src/Security/CWE/CWE-129/ImproperArrayIndexValidation.ql

Lines changed: 30 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -14,102 +14,56 @@
1414

1515
import cpp
1616
import semmle.code.cpp.controlflow.IRGuards
17-
import semmle.code.cpp.security.FlowSources
18-
import semmle.code.cpp.ir.dataflow.TaintTracking
19-
import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
17+
import semmle.code.cpp.security.FlowSources as FS
18+
import semmle.code.cpp.dataflow.new.TaintTracking
19+
import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
2020
import ImproperArrayIndexValidation::PathGraph
21-
import semmle.code.cpp.security.Security
2221

23-
predicate hasUpperBound(VariableAccess offsetExpr) {
24-
exists(BasicBlock controlled, StackVariable offsetVar, SsaDefinition def |
25-
controlled.contains(offsetExpr) and
26-
linearBoundControls(controlled, def, offsetVar) and
27-
offsetExpr = def.getAUse(offsetVar)
28-
)
22+
predicate isFlowSource(FS::FlowSource source, string sourceType) {
23+
sourceType = source.getSourceType()
2924
}
3025

31-
pragma[noinline]
32-
predicate linearBoundControls(BasicBlock controlled, SsaDefinition def, StackVariable offsetVar) {
33-
exists(GuardCondition guard, boolean branch |
34-
guard.controls(controlled, branch) and
35-
cmpWithLinearBound(guard, def.getAUse(offsetVar), Lesser(), branch)
26+
predicate guardChecks(IRGuardCondition g, Expr e, boolean branch) {
27+
exists(Operand op | op.getDef().getConvertedResultExpression() = e |
28+
// `op < k` is true and `k > 0`
29+
g.comparesLt(op, any(int k | k > 0), true, any(BooleanValue bv | bv.getValue() = branch))
30+
or
31+
// `op < _ + k` is true and `k > 0`.
32+
g.comparesLt(op, _, any(int k | k > 0), true, branch)
33+
or
34+
// op == k
35+
g.comparesEq(op, _, true, any(BooleanValue bv | bv.getValue() = branch))
36+
or
37+
// op == _ + k
38+
g.comparesEq(op, _, _, true, branch)
3639
)
3740
}
3841

39-
predicate readsVariable(LoadInstruction load, Variable var) {
40-
load.getSourceAddress().(VariableAddressInstruction).getAstVariable() = var
41-
}
42-
43-
predicate hasUpperBoundsCheck(Variable var) {
44-
exists(RelationalOperation oper, VariableAccess access |
45-
oper.getAnOperand() = access and
46-
access.getTarget() = var and
47-
// Comparing to 0 is not an upper bound check
48-
not oper.getAnOperand().getValue() = "0"
42+
/**
43+
* Holds if `arrayExpr` accesses an `ArrayType` with a constant size `N`, and
44+
* the value of `offsetExpr` is known to be smaller than `N`.
45+
*/
46+
predicate offsetIsAlwaysInBounds(ArrayExpr arrayExpr, VariableAccess offsetExpr) {
47+
exists(ArrayType arrayType |
48+
arrayType = arrayExpr.getArrayBase().getUnspecifiedType() and
49+
arrayType.getArraySize() > upperBound(offsetExpr.getFullyConverted())
4950
)
5051
}
5152

52-
predicate nodeIsBarrierEqualityCandidate(DataFlow::Node node, Operand access, Variable checkedVar) {
53-
readsVariable(node.asInstruction(), checkedVar) and
54-
any(IRGuardCondition guard).ensuresEq(access, _, _, node.asInstruction().getBlock(), true)
55-
}
56-
57-
predicate isFlowSource(FlowSource source, string sourceType) { sourceType = source.getSourceType() }
58-
59-
predicate predictableInstruction(Instruction instr) {
60-
instr instanceof ConstantInstruction
61-
or
62-
instr instanceof StringConstantInstruction
63-
or
64-
// This could be a conversion on a string literal
65-
predictableInstruction(instr.(UnaryInstruction).getUnary())
66-
}
67-
6853
module ImproperArrayIndexValidationConfig implements DataFlow::ConfigSig {
6954
predicate isSource(DataFlow::Node source) { isFlowSource(source, _) }
7055

7156
predicate isBarrier(DataFlow::Node node) {
72-
hasUpperBound(node.asExpr())
73-
or
74-
// These barriers are ported from `DefaultTaintTracking` because this query is quite noisy
75-
// otherwise.
76-
exists(Variable checkedVar |
77-
readsVariable(node.asInstruction(), checkedVar) and
78-
hasUpperBoundsCheck(checkedVar)
79-
)
80-
or
81-
exists(Variable checkedVar, Operand access |
82-
readsVariable(access.getDef(), checkedVar) and
83-
nodeIsBarrierEqualityCandidate(node, access, checkedVar)
84-
)
85-
or
86-
// Don't use dataflow into binary instructions if both operands are unpredictable
87-
exists(BinaryInstruction iTo |
88-
iTo = node.asInstruction() and
89-
not predictableInstruction(iTo.getLeft()) and
90-
not predictableInstruction(iTo.getRight()) and
91-
// propagate taint from either the pointer or the offset, regardless of predictability
92-
not iTo instanceof PointerArithmeticInstruction
93-
)
94-
or
95-
// don't use dataflow through calls to pure functions if two or more operands
96-
// are unpredictable
97-
exists(Instruction iFrom1, Instruction iFrom2, CallInstruction iTo |
98-
iTo = node.asInstruction() and
99-
isPureFunction(iTo.getStaticCallTarget().getName()) and
100-
iFrom1 = iTo.getAnArgument() and
101-
iFrom2 = iTo.getAnArgument() and
102-
not predictableInstruction(iFrom1) and
103-
not predictableInstruction(iFrom2) and
104-
iFrom1 != iFrom2
105-
)
57+
node = DataFlow::BarrierGuard<guardChecks/3>::getABarrierNode()
10658
}
10759

60+
predicate isBarrierOut(DataFlow::Node node) { isSink(node) }
61+
10862
predicate isSink(DataFlow::Node sink) {
10963
exists(ArrayExpr arrayExpr, VariableAccess offsetExpr |
11064
offsetExpr = arrayExpr.getArrayOffset() and
11165
sink.asExpr() = offsetExpr and
112-
not hasUpperBound(offsetExpr)
66+
not offsetIsAlwaysInBounds(arrayExpr, offsetExpr)
11367
)
11468
}
11569
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* The `cpp/unclear-array-index-validation` ("Unclear validation of array index") query has been improved to reduce false positives increase true positives.

cpp/ql/test/query-tests/Security/CWE/CWE-129/semmle/ImproperArrayIndexValidation/ImproperArrayIndexValidation.expected

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,29 +2,36 @@ edges
22
| test1.c:7:26:7:29 | **argv | test1.c:8:11:8:14 | call to atoi | provenance | TaintFunction |
33
| test1.c:8:11:8:14 | call to atoi | test1.c:9:9:9:9 | i | provenance | |
44
| test1.c:8:11:8:14 | call to atoi | test1.c:11:9:11:9 | i | provenance | |
5+
| test1.c:8:11:8:14 | call to atoi | test1.c:12:9:12:9 | i | provenance | |
56
| test1.c:8:11:8:14 | call to atoi | test1.c:13:9:13:9 | i | provenance | |
6-
| test1.c:9:9:9:9 | i | test1.c:16:16:16:16 | i | provenance | |
7-
| test1.c:11:9:11:9 | i | test1.c:32:16:32:16 | i | provenance | |
8-
| test1.c:13:9:13:9 | i | test1.c:48:16:48:16 | i | provenance | |
9-
| test1.c:16:16:16:16 | i | test1.c:18:16:18:16 | i | provenance | |
10-
| test1.c:32:16:32:16 | i | test1.c:33:11:33:11 | i | provenance | |
11-
| test1.c:48:16:48:16 | i | test1.c:51:3:51:7 | ... = ... | provenance | |
12-
| test1.c:51:3:51:7 | ... = ... | test1.c:53:15:53:15 | j | provenance | |
7+
| test1.c:9:9:9:9 | i | test1.c:18:16:18:16 | i | provenance | |
8+
| test1.c:11:9:11:9 | i | test1.c:34:16:34:16 | i | provenance | |
9+
| test1.c:12:9:12:9 | i | test1.c:42:16:42:16 | i | provenance | |
10+
| test1.c:13:9:13:9 | i | test1.c:50:16:50:16 | i | provenance | |
11+
| test1.c:18:16:18:16 | i | test1.c:20:16:20:16 | i | provenance | |
12+
| test1.c:34:16:34:16 | i | test1.c:35:11:35:11 | i | provenance | |
13+
| test1.c:42:16:42:16 | i | test1.c:43:11:43:11 | i | provenance | |
14+
| test1.c:50:16:50:16 | i | test1.c:53:3:53:7 | ... = ... | provenance | |
15+
| test1.c:53:3:53:7 | ... = ... | test1.c:55:15:55:15 | j | provenance | |
1316
nodes
1417
| test1.c:7:26:7:29 | **argv | semmle.label | **argv |
1518
| test1.c:8:11:8:14 | call to atoi | semmle.label | call to atoi |
1619
| test1.c:9:9:9:9 | i | semmle.label | i |
1720
| test1.c:11:9:11:9 | i | semmle.label | i |
21+
| test1.c:12:9:12:9 | i | semmle.label | i |
1822
| test1.c:13:9:13:9 | i | semmle.label | i |
19-
| test1.c:16:16:16:16 | i | semmle.label | i |
2023
| test1.c:18:16:18:16 | i | semmle.label | i |
21-
| test1.c:32:16:32:16 | i | semmle.label | i |
22-
| test1.c:33:11:33:11 | i | semmle.label | i |
23-
| test1.c:48:16:48:16 | i | semmle.label | i |
24-
| test1.c:51:3:51:7 | ... = ... | semmle.label | ... = ... |
25-
| test1.c:53:15:53:15 | j | semmle.label | j |
24+
| test1.c:20:16:20:16 | i | semmle.label | i |
25+
| test1.c:34:16:34:16 | i | semmle.label | i |
26+
| test1.c:35:11:35:11 | i | semmle.label | i |
27+
| test1.c:42:16:42:16 | i | semmle.label | i |
28+
| test1.c:43:11:43:11 | i | semmle.label | i |
29+
| test1.c:50:16:50:16 | i | semmle.label | i |
30+
| test1.c:53:3:53:7 | ... = ... | semmle.label | ... = ... |
31+
| test1.c:55:15:55:15 | j | semmle.label | j |
2632
subpaths
2733
#select
28-
| test1.c:18:16:18:16 | i | test1.c:7:26:7:29 | **argv | test1.c:18:16:18:16 | i | An array indexing expression depends on $@ that might be outside the bounds of the array. | test1.c:7:26:7:29 | **argv | a command-line argument |
29-
| test1.c:33:11:33:11 | i | test1.c:7:26:7:29 | **argv | test1.c:33:11:33:11 | i | An array indexing expression depends on $@ that might be outside the bounds of the array. | test1.c:7:26:7:29 | **argv | a command-line argument |
30-
| test1.c:53:15:53:15 | j | test1.c:7:26:7:29 | **argv | test1.c:53:15:53:15 | j | An array indexing expression depends on $@ that might be outside the bounds of the array. | test1.c:7:26:7:29 | **argv | a command-line argument |
34+
| test1.c:20:16:20:16 | i | test1.c:7:26:7:29 | **argv | test1.c:20:16:20:16 | i | An array indexing expression depends on $@ that might be outside the bounds of the array. | test1.c:7:26:7:29 | **argv | a command-line argument |
35+
| test1.c:35:11:35:11 | i | test1.c:7:26:7:29 | **argv | test1.c:35:11:35:11 | i | An array indexing expression depends on $@ that might be outside the bounds of the array. | test1.c:7:26:7:29 | **argv | a command-line argument |
36+
| test1.c:43:11:43:11 | i | test1.c:7:26:7:29 | **argv | test1.c:43:11:43:11 | i | An array indexing expression depends on $@ that might be outside the bounds of the array. | test1.c:7:26:7:29 | **argv | a command-line argument |
37+
| test1.c:55:15:55:15 | j | test1.c:7:26:7:29 | **argv | test1.c:55:15:55:15 | j | An array indexing expression depends on $@ that might be outside the bounds of the array. | test1.c:7:26:7:29 | **argv | a command-line argument |

cpp/ql/test/query-tests/Security/CWE/CWE-129/semmle/ImproperArrayIndexValidation/test1.c

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ int main(int argc, char *argv[]) {
1111
test3(i);
1212
test4(i);
1313
test5(i);
14+
test6(i);
15+
test7(argv[1]);
1416
}
1517

1618
void test1(int i) {
@@ -38,7 +40,7 @@ void test3(int i) {
3840
}
3941

4042
void test4(int i) {
41-
myArray[i] = 0; // BAD: i has not been validated [NOT REPORTED]
43+
myArray[i] = 0; // BAD: i has not been validated
4244

4345
if ((i < 0) || (i >= 10)) return;
4446

@@ -52,3 +54,26 @@ void test5(int i) {
5254

5355
j = myArray[j]; // BAD: j has not been validated
5456
}
57+
58+
extern int myTable[256];
59+
60+
void test6(int i) {
61+
unsigned char s = i;
62+
63+
myTable[s] = 0; // GOOD: Input is small [FALSE POSITIVE]
64+
}
65+
66+
typedef void FILE;
67+
#define EOF (-1)
68+
69+
int getc(FILE*);
70+
71+
extern int myMaxCharTable[256];
72+
73+
void test7(FILE* fp) {
74+
int ch;
75+
while ((ch = getc(fp)) != EOF) {
76+
myMaxCharTable[ch] = 0; // GOOD
77+
}
78+
}
79+

0 commit comments

Comments
 (0)