Skip to content

Commit 6fda5e8

Browse files
authored
Merge pull request #7272 from github/redsun82/cpp-overrunning-write-precision-split
C++: refactor buffer overwrite queries with estimate reasons
2 parents 1c79d1f + 0d7d60e commit 6fda5e8

File tree

5 files changed

+296
-94
lines changed

5 files changed

+296
-94
lines changed

cpp/ql/lib/semmle/code/cpp/commons/Printf.qll

Lines changed: 212 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,83 @@ import semmle.code.cpp.models.interfaces.FormattingFunction
99
private import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
1010
private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
1111

12+
private newtype TBufferWriteEstimationReason =
13+
TNoSpecifiedEstimateReason() or
14+
TTypeBoundsAnalysis() or
15+
TValueFlowAnalysis()
16+
17+
/**
18+
* A reason for a specific buffer write size estimate.
19+
*/
20+
abstract class BufferWriteEstimationReason extends TBufferWriteEstimationReason {
21+
/**
22+
* Returns the name of the concrete class.
23+
*/
24+
abstract string toString();
25+
26+
/**
27+
* Returns a human readable representation of this reason.
28+
*/
29+
abstract string getDescription();
30+
31+
/**
32+
* Combine estimate reasons. Used to give a reason for the size of a format string
33+
* conversion given reasons coming from its individual specifiers.
34+
*/
35+
abstract BufferWriteEstimationReason combineWith(BufferWriteEstimationReason other);
36+
}
37+
38+
/**
39+
* No particular reason given. This is currently used for backward compatibility so that
40+
* classes derived from BufferWrite and overriding `getMaxData/0` still work with the
41+
* queries as intended.
42+
*/
43+
class NoSpecifiedEstimateReason extends BufferWriteEstimationReason, TNoSpecifiedEstimateReason {
44+
override string toString() { result = "NoSpecifiedEstimateReason" }
45+
46+
override string getDescription() { result = "no reason specified" }
47+
48+
override BufferWriteEstimationReason combineWith(BufferWriteEstimationReason other) {
49+
// this reason should not be used in format specifiers, so it should not be combined
50+
// with other reasons
51+
none()
52+
}
53+
}
54+
55+
/**
56+
* The estimation comes from rough bounds just based on the type (e.g.
57+
* `0 <= x < 2^32` for an unsigned 32 bit integer).
58+
*/
59+
class TypeBoundsAnalysis extends BufferWriteEstimationReason, TTypeBoundsAnalysis {
60+
override string toString() { result = "TypeBoundsAnalysis" }
61+
62+
override string getDescription() { result = "based on type bounds" }
63+
64+
override BufferWriteEstimationReason combineWith(BufferWriteEstimationReason other) {
65+
other != TNoSpecifiedEstimateReason() and result = TTypeBoundsAnalysis()
66+
}
67+
}
68+
69+
/**
70+
* The estimation comes from non trivial bounds found via actual flow analysis.
71+
* For example
72+
* ```
73+
* unsigned u = x;
74+
* if (u < 1000) {
75+
* //... <- estimation done here based on u
76+
* }
77+
* ```
78+
*/
79+
class ValueFlowAnalysis extends BufferWriteEstimationReason, TValueFlowAnalysis {
80+
override string toString() { result = "ValueFlowAnalysis" }
81+
82+
override string getDescription() { result = "based on flow analysis of value bounds" }
83+
84+
override BufferWriteEstimationReason combineWith(BufferWriteEstimationReason other) {
85+
other != TNoSpecifiedEstimateReason() and result = other
86+
}
87+
}
88+
1289
class PrintfFormatAttribute extends FormatAttribute {
1390
PrintfFormatAttribute() { this.getArchetype() = ["printf", "__printf__"] }
1491
}
@@ -990,7 +1067,14 @@ class FormatLiteral extends Literal {
9901067
* conversion specifier of this format string; has no result if this cannot
9911068
* be determined.
9921069
*/
993-
int getMaxConvertedLength(int n) {
1070+
int getMaxConvertedLength(int n) { result = max(getMaxConvertedLength(n, _)) }
1071+
1072+
/**
1073+
* Gets the maximum length of the string that can be produced by the nth
1074+
* conversion specifier of this format string, specifying the estimation reason;
1075+
* has no result if this cannot be determined.
1076+
*/
1077+
int getMaxConvertedLength(int n, BufferWriteEstimationReason reason) {
9941078
exists(int len |
9951079
(
9961080
(
@@ -1002,10 +1086,12 @@ class FormatLiteral extends Literal {
10021086
) and
10031087
(
10041088
this.getConversionChar(n) = "%" and
1005-
len = 1
1089+
len = 1 and
1090+
reason = TValueFlowAnalysis()
10061091
or
10071092
this.getConversionChar(n).toLowerCase() = "c" and
1008-
len = 1 // e.g. 'a'
1093+
len = 1 and
1094+
reason = TValueFlowAnalysis() // e.g. 'a'
10091095
or
10101096
this.getConversionChar(n).toLowerCase() = "f" and
10111097
exists(int dot, int afterdot |
@@ -1019,7 +1105,8 @@ class FormatLiteral extends Literal {
10191105
afterdot = 6
10201106
) and
10211107
len = 1 + 309 + dot + afterdot
1022-
) // e.g. -1e308="-100000"...
1108+
) and
1109+
reason = TTypeBoundsAnalysis() // e.g. -1e308="-100000"...
10231110
or
10241111
this.getConversionChar(n).toLowerCase() = "e" and
10251112
exists(int dot, int afterdot |
@@ -1033,7 +1120,8 @@ class FormatLiteral extends Literal {
10331120
afterdot = 6
10341121
) and
10351122
len = 1 + 1 + dot + afterdot + 1 + 1 + 3
1036-
) // -1e308="-1.000000e+308"
1123+
) and
1124+
reason = TTypeBoundsAnalysis() // -1e308="-1.000000e+308"
10371125
or
10381126
this.getConversionChar(n).toLowerCase() = "g" and
10391127
exists(int dot, int afterdot |
@@ -1056,67 +1144,80 @@ class FormatLiteral extends Literal {
10561144
// (e.g. 123456, 0.000123456 are just OK)
10571145
// so case %f can be at most P characters + 4 zeroes, sign, dot = P + 6
10581146
len = (afterdot.maximum(1) + 6).maximum(1 + 1 + dot + afterdot + 1 + 1 + 3)
1059-
) // (e.g. "-1.59203e-319")
1147+
) and
1148+
reason = TTypeBoundsAnalysis() // (e.g. "-1.59203e-319")
10601149
or
10611150
this.getConversionChar(n).toLowerCase() = ["d", "i"] and
10621151
// e.g. -2^31 = "-2147483648"
1063-
len =
1064-
min(float cand |
1065-
// The first case handles length sub-specifiers
1066-
// Subtract one in the exponent because one bit is for the sign.
1067-
// Add 1 to account for the possible sign in the output.
1068-
cand = 1 + lengthInBase10(2.pow(this.getIntegralDisplayType(n).getSize() * 8 - 1))
1069-
or
1070-
// The second case uses range analysis to deduce a length that's shorter than the length
1071-
// of the number -2^31.
1072-
exists(Expr arg, float lower, float upper |
1073-
arg = this.getUse().getConversionArgument(n) and
1074-
lower = lowerBound(arg.getFullyConverted()) and
1075-
upper = upperBound(arg.getFullyConverted())
1076-
|
1077-
cand =
1078-
max(int cand0 |
1079-
// Include the sign bit in the length if it can be negative
1080-
(
1081-
if lower < 0
1082-
then cand0 = 1 + lengthInBase10(lower.abs())
1083-
else cand0 = lengthInBase10(lower)
1084-
)
1085-
or
1086-
(
1087-
if upper < 0
1088-
then cand0 = 1 + lengthInBase10(upper.abs())
1089-
else cand0 = lengthInBase10(upper)
1090-
)
1152+
exists(float typeBasedBound, float valueBasedBound |
1153+
// The first case handles length sub-specifiers
1154+
// Subtract one in the exponent because one bit is for the sign.
1155+
// Add 1 to account for the possible sign in the output.
1156+
typeBasedBound =
1157+
1 + lengthInBase10(2.pow(this.getIntegralDisplayType(n).getSize() * 8 - 1)) and
1158+
// The second case uses range analysis to deduce a length that's shorter than the length
1159+
// of the number -2^31.
1160+
exists(Expr arg, float lower, float upper, float typeLower, float typeUpper |
1161+
arg = this.getUse().getConversionArgument(n) and
1162+
lower = lowerBound(arg.getFullyConverted()) and
1163+
upper = upperBound(arg.getFullyConverted()) and
1164+
typeLower = exprMinVal(arg.getFullyConverted()) and
1165+
typeUpper = exprMaxVal(arg.getFullyConverted())
1166+
|
1167+
valueBasedBound =
1168+
max(int cand |
1169+
// Include the sign bit in the length if it can be negative
1170+
(
1171+
if lower < 0
1172+
then cand = 1 + lengthInBase10(lower.abs())
1173+
else cand = lengthInBase10(lower)
1174+
)
1175+
or
1176+
(
1177+
if upper < 0
1178+
then cand = 1 + lengthInBase10(upper.abs())
1179+
else cand = lengthInBase10(upper)
10911180
)
1181+
) and
1182+
(
1183+
if lower > typeLower or upper < typeUpper
1184+
then reason = TValueFlowAnalysis()
1185+
else reason = TTypeBoundsAnalysis()
10921186
)
1093-
)
1187+
) and
1188+
len = valueBasedBound.minimum(typeBasedBound)
1189+
)
10941190
or
10951191
this.getConversionChar(n).toLowerCase() = "u" and
10961192
// e.g. 2^32 - 1 = "4294967295"
1097-
len =
1098-
min(float cand |
1099-
// The first case handles length sub-specifiers
1100-
cand = 2.pow(this.getIntegralDisplayType(n).getSize() * 8)
1101-
or
1102-
// The second case uses range analysis to deduce a length that's shorter than
1103-
// the length of the number 2^31 - 1.
1104-
exists(Expr arg, float lower |
1105-
arg = this.getUse().getConversionArgument(n) and
1106-
lower = lowerBound(arg.getFullyConverted())
1107-
|
1108-
cand =
1109-
max(float cand0 |
1193+
exists(float typeBasedBound, float valueBasedBound |
1194+
// The first case handles length sub-specifiers
1195+
typeBasedBound = lengthInBase10(2.pow(this.getIntegralDisplayType(n).getSize() * 8) - 1) and
1196+
// The second case uses range analysis to deduce a length that's shorter than
1197+
// the length of the number 2^31 - 1.
1198+
exists(Expr arg, float lower, float upper, float typeLower, float typeUpper |
1199+
arg = this.getUse().getConversionArgument(n) and
1200+
lower = lowerBound(arg.getFullyConverted()) and
1201+
upper = upperBound(arg.getFullyConverted()) and
1202+
typeLower = exprMinVal(arg.getFullyConverted()) and
1203+
typeUpper = exprMaxVal(arg.getFullyConverted())
1204+
|
1205+
valueBasedBound =
1206+
lengthInBase10(max(float cand |
11101207
// If lower can be negative we use `(unsigned)-1` as the candidate value.
11111208
lower < 0 and
1112-
cand0 = 2.pow(any(IntType t | t.isUnsigned()).getSize() * 8)
1209+
cand = 2.pow(any(IntType t | t.isUnsigned()).getSize() * 8)
11131210
or
1114-
cand0 = upperBound(arg.getFullyConverted())
1115-
)
1211+
cand = upper
1212+
)) and
1213+
(
1214+
if lower > typeLower or upper < typeUpper
1215+
then reason = TValueFlowAnalysis()
1216+
else reason = TTypeBoundsAnalysis()
11161217
)
1117-
|
1118-
lengthInBase10(cand)
1119-
)
1218+
) and
1219+
len = valueBasedBound.minimum(typeBasedBound)
1220+
)
11201221
or
11211222
this.getConversionChar(n).toLowerCase() = "x" and
11221223
// e.g. "12345678"
@@ -1135,7 +1236,8 @@ class FormatLiteral extends Literal {
11351236
(
11361237
if this.hasAlternateFlag(n) then len = 2 + baseLen else len = baseLen // "0x"
11371238
)
1138-
)
1239+
) and
1240+
reason = TTypeBoundsAnalysis()
11391241
or
11401242
this.getConversionChar(n).toLowerCase() = "p" and
11411243
exists(PointerType ptrType, int baseLen |
@@ -1144,7 +1246,8 @@ class FormatLiteral extends Literal {
11441246
(
11451247
if this.hasAlternateFlag(n) then len = 2 + baseLen else len = baseLen // "0x"
11461248
)
1147-
)
1249+
) and
1250+
reason = TValueFlowAnalysis()
11481251
or
11491252
this.getConversionChar(n).toLowerCase() = "o" and
11501253
// e.g. 2^32 - 1 = "37777777777"
@@ -1163,14 +1266,16 @@ class FormatLiteral extends Literal {
11631266
(
11641267
if this.hasAlternateFlag(n) then len = 1 + baseLen else len = baseLen // "0"
11651268
)
1166-
)
1269+
) and
1270+
reason = TTypeBoundsAnalysis()
11671271
or
11681272
this.getConversionChar(n).toLowerCase() = "s" and
11691273
len =
11701274
min(int v |
11711275
v = this.getPrecision(n) or
11721276
v = this.getUse().getFormatArgument(n).(AnalysedString).getMaxLength() - 1 // (don't count null terminator)
1173-
)
1277+
) and
1278+
reason = TValueFlowAnalysis()
11741279
)
11751280
)
11761281
}
@@ -1182,10 +1287,19 @@ class FormatLiteral extends Literal {
11821287
* determining whether a buffer overflow is caused by long float to string
11831288
* conversions.
11841289
*/
1185-
int getMaxConvertedLengthLimited(int n) {
1290+
int getMaxConvertedLengthLimited(int n) { result = max(getMaxConvertedLengthLimited(n, _)) }
1291+
1292+
/**
1293+
* Gets the maximum length of the string that can be produced by the nth
1294+
* conversion specifier of this format string, specifying the reason for the
1295+
* estimation, except that float to string conversions are assumed to be 8
1296+
* characters. This is helpful for determining whether a buffer overflow is
1297+
* caused by long float to string conversions.
1298+
*/
1299+
int getMaxConvertedLengthLimited(int n, BufferWriteEstimationReason reason) {
11861300
if this.getConversionChar(n).toLowerCase() = "f"
1187-
then result = this.getMaxConvertedLength(n).minimum(8)
1188-
else result = this.getMaxConvertedLength(n)
1301+
then result = this.getMaxConvertedLength(n, reason).minimum(8)
1302+
else result = this.getMaxConvertedLength(n, reason)
11891303
}
11901304

11911305
/**
@@ -1225,35 +1339,60 @@ class FormatLiteral extends Literal {
12251339
)
12261340
}
12271341

1228-
private int getMaxConvertedLengthAfter(int n) {
1342+
private int getMaxConvertedLengthAfter(int n, BufferWriteEstimationReason reason) {
12291343
if n = this.getNumConvSpec()
1230-
then result = this.getConstantSuffix().length() + 1
1344+
then result = this.getConstantSuffix().length() + 1 and reason = TValueFlowAnalysis()
12311345
else
1232-
result =
1233-
this.getConstantPart(n).length() + this.getMaxConvertedLength(n) +
1234-
this.getMaxConvertedLengthAfter(n + 1)
1346+
exists(BufferWriteEstimationReason headReason, BufferWriteEstimationReason tailReason |
1347+
result =
1348+
this.getConstantPart(n).length() + this.getMaxConvertedLength(n, headReason) +
1349+
this.getMaxConvertedLengthAfter(n + 1, tailReason) and
1350+
reason = headReason.combineWith(tailReason)
1351+
)
12351352
}
12361353

1237-
private int getMaxConvertedLengthAfterLimited(int n) {
1354+
private int getMaxConvertedLengthAfterLimited(int n, BufferWriteEstimationReason reason) {
12381355
if n = this.getNumConvSpec()
1239-
then result = this.getConstantSuffix().length() + 1
1356+
then result = this.getConstantSuffix().length() + 1 and reason = TValueFlowAnalysis()
12401357
else
1241-
result =
1242-
this.getConstantPart(n).length() + this.getMaxConvertedLengthLimited(n) +
1243-
this.getMaxConvertedLengthAfterLimited(n + 1)
1358+
exists(BufferWriteEstimationReason headReason, BufferWriteEstimationReason tailReason |
1359+
result =
1360+
this.getConstantPart(n).length() + this.getMaxConvertedLengthLimited(n, headReason) +
1361+
this.getMaxConvertedLengthAfterLimited(n + 1, tailReason) and
1362+
reason = headReason.combineWith(tailReason)
1363+
)
12441364
}
12451365

12461366
/**
12471367
* Gets the maximum length of the string that can be produced by this format
12481368
* string. Has no result if this cannot be determined.
12491369
*/
1250-
int getMaxConvertedLength() { result = this.getMaxConvertedLengthAfter(0) }
1370+
int getMaxConvertedLength() { result = this.getMaxConvertedLengthAfter(0, _) }
12511371

12521372
/**
12531373
* Gets the maximum length of the string that can be produced by this format
12541374
* string, except that float to string conversions are assumed to be 8
12551375
* characters. This is helpful for determining whether a buffer overflow
12561376
* is caused by long float to string conversions.
12571377
*/
1258-
int getMaxConvertedLengthLimited() { result = this.getMaxConvertedLengthAfterLimited(0) }
1378+
int getMaxConvertedLengthLimited() { result = this.getMaxConvertedLengthAfterLimited(0, _) }
1379+
1380+
/**
1381+
* Gets the maximum length of the string that can be produced by this format
1382+
* string, specifying the reason for the estimate. Has no result if no estimate
1383+
* can be found.
1384+
*/
1385+
int getMaxConvertedLengthWithReason(BufferWriteEstimationReason reason) {
1386+
result = this.getMaxConvertedLengthAfter(0, reason)
1387+
}
1388+
1389+
/**
1390+
* Gets the maximum length of the string that can be produced by this format
1391+
* string, specifying the reason for the estimate, except that float to string
1392+
* conversions are assumed to be 8 characters. This is helpful for determining
1393+
* whether a buffer overflow is caused by long float to string conversions.
1394+
*/
1395+
int getMaxConvertedLengthLimitedWithReason(BufferWriteEstimationReason reason) {
1396+
result = this.getMaxConvertedLengthAfterLimited(0, reason)
1397+
}
12591398
}

0 commit comments

Comments
 (0)