@@ -9,6 +9,83 @@ import semmle.code.cpp.models.interfaces.FormattingFunction
9
9
private import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
10
10
private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
11
11
12
+ private newtype TBufferWriteEstimationReason =
13
+ TNoSpecifiedEstimateReason ( ) or
14
+ TTypeBoundsAnalysis ( ) or
15
+ TValueFlowAnalysis ( )
16
+
17
+ /**
18
+ * A reason for a specific buffer write size estimate.
19
+ */
20
+ abstract class BufferWriteEstimationReason extends TBufferWriteEstimationReason {
21
+ /**
22
+ * Returns the name of the concrete class.
23
+ */
24
+ abstract string toString ( ) ;
25
+
26
+ /**
27
+ * Returns a human readable representation of this reason.
28
+ */
29
+ abstract string getDescription ( ) ;
30
+
31
+ /**
32
+ * Combine estimate reasons. Used to give a reason for the size of a format string
33
+ * conversion given reasons coming from its individual specifiers.
34
+ */
35
+ abstract BufferWriteEstimationReason combineWith ( BufferWriteEstimationReason other ) ;
36
+ }
37
+
38
+ /**
39
+ * No particular reason given. This is currently used for backward compatibility so that
40
+ * classes derived from BufferWrite and overriding `getMaxData/0` still work with the
41
+ * queries as intended.
42
+ */
43
+ class NoSpecifiedEstimateReason extends BufferWriteEstimationReason , TNoSpecifiedEstimateReason {
44
+ override string toString ( ) { result = "NoSpecifiedEstimateReason" }
45
+
46
+ override string getDescription ( ) { result = "no reason specified" }
47
+
48
+ override BufferWriteEstimationReason combineWith ( BufferWriteEstimationReason other ) {
49
+ // this reason should not be used in format specifiers, so it should not be combined
50
+ // with other reasons
51
+ none ( )
52
+ }
53
+ }
54
+
55
+ /**
56
+ * The estimation comes from rough bounds just based on the type (e.g.
57
+ * `0 <= x < 2^32` for an unsigned 32 bit integer).
58
+ */
59
+ class TypeBoundsAnalysis extends BufferWriteEstimationReason , TTypeBoundsAnalysis {
60
+ override string toString ( ) { result = "TypeBoundsAnalysis" }
61
+
62
+ override string getDescription ( ) { result = "based on type bounds" }
63
+
64
+ override BufferWriteEstimationReason combineWith ( BufferWriteEstimationReason other ) {
65
+ other != TNoSpecifiedEstimateReason ( ) and result = TTypeBoundsAnalysis ( )
66
+ }
67
+ }
68
+
69
+ /**
70
+ * The estimation comes from non trivial bounds found via actual flow analysis.
71
+ * For example
72
+ * ```
73
+ * unsigned u = x;
74
+ * if (u < 1000) {
75
+ * //... <- estimation done here based on u
76
+ * }
77
+ * ```
78
+ */
79
+ class ValueFlowAnalysis extends BufferWriteEstimationReason , TValueFlowAnalysis {
80
+ override string toString ( ) { result = "ValueFlowAnalysis" }
81
+
82
+ override string getDescription ( ) { result = "based on flow analysis of value bounds" }
83
+
84
+ override BufferWriteEstimationReason combineWith ( BufferWriteEstimationReason other ) {
85
+ other != TNoSpecifiedEstimateReason ( ) and result = other
86
+ }
87
+ }
88
+
12
89
class PrintfFormatAttribute extends FormatAttribute {
13
90
PrintfFormatAttribute ( ) { this .getArchetype ( ) = [ "printf" , "__printf__" ] }
14
91
}
@@ -990,7 +1067,14 @@ class FormatLiteral extends Literal {
990
1067
* conversion specifier of this format string; has no result if this cannot
991
1068
* be determined.
992
1069
*/
993
- int getMaxConvertedLength ( int n ) {
1070
+ int getMaxConvertedLength ( int n ) { result = max ( getMaxConvertedLength ( n , _) ) }
1071
+
1072
+ /**
1073
+ * Gets the maximum length of the string that can be produced by the nth
1074
+ * conversion specifier of this format string, specifying the estimation reason;
1075
+ * has no result if this cannot be determined.
1076
+ */
1077
+ int getMaxConvertedLength ( int n , BufferWriteEstimationReason reason ) {
994
1078
exists ( int len |
995
1079
(
996
1080
(
@@ -1002,10 +1086,12 @@ class FormatLiteral extends Literal {
1002
1086
) and
1003
1087
(
1004
1088
this .getConversionChar ( n ) = "%" and
1005
- len = 1
1089
+ len = 1 and
1090
+ reason = TValueFlowAnalysis ( )
1006
1091
or
1007
1092
this .getConversionChar ( n ) .toLowerCase ( ) = "c" and
1008
- len = 1 // e.g. 'a'
1093
+ len = 1 and
1094
+ reason = TValueFlowAnalysis ( ) // e.g. 'a'
1009
1095
or
1010
1096
this .getConversionChar ( n ) .toLowerCase ( ) = "f" and
1011
1097
exists ( int dot , int afterdot |
@@ -1019,7 +1105,8 @@ class FormatLiteral extends Literal {
1019
1105
afterdot = 6
1020
1106
) and
1021
1107
len = 1 + 309 + dot + afterdot
1022
- ) // e.g. -1e308="-100000"...
1108
+ ) and
1109
+ reason = TTypeBoundsAnalysis ( ) // e.g. -1e308="-100000"...
1023
1110
or
1024
1111
this .getConversionChar ( n ) .toLowerCase ( ) = "e" and
1025
1112
exists ( int dot , int afterdot |
@@ -1033,7 +1120,8 @@ class FormatLiteral extends Literal {
1033
1120
afterdot = 6
1034
1121
) and
1035
1122
len = 1 + 1 + dot + afterdot + 1 + 1 + 3
1036
- ) // -1e308="-1.000000e+308"
1123
+ ) and
1124
+ reason = TTypeBoundsAnalysis ( ) // -1e308="-1.000000e+308"
1037
1125
or
1038
1126
this .getConversionChar ( n ) .toLowerCase ( ) = "g" and
1039
1127
exists ( int dot , int afterdot |
@@ -1056,67 +1144,80 @@ class FormatLiteral extends Literal {
1056
1144
// (e.g. 123456, 0.000123456 are just OK)
1057
1145
// so case %f can be at most P characters + 4 zeroes, sign, dot = P + 6
1058
1146
len = ( afterdot .maximum ( 1 ) + 6 ) .maximum ( 1 + 1 + dot + afterdot + 1 + 1 + 3 )
1059
- ) // (e.g. "-1.59203e-319")
1147
+ ) and
1148
+ reason = TTypeBoundsAnalysis ( ) // (e.g. "-1.59203e-319")
1060
1149
or
1061
1150
this .getConversionChar ( n ) .toLowerCase ( ) = [ "d" , "i" ] and
1062
1151
// e.g. -2^31 = "-2147483648"
1063
- len =
1064
- min ( float cand |
1065
- // The first case handles length sub-specifiers
1066
- // Subtract one in the exponent because one bit is for the sign .
1067
- // Add 1 to account for the possible sign in the output.
1068
- cand = 1 + lengthInBase10 ( 2 .pow ( this .getIntegralDisplayType ( n ) .getSize ( ) * 8 - 1 ) )
1069
- or
1070
- // The second case uses range analysis to deduce a length that's shorter than the length
1071
- // of the number -2^31.
1072
- exists ( Expr arg , float lower , float upper |
1073
- arg = this . getUse ( ) . getConversionArgument ( n ) and
1074
- lower = lowerBound ( arg .getFullyConverted ( ) ) and
1075
- upper = upperBound ( arg .getFullyConverted ( ) )
1076
- |
1077
- cand =
1078
- max ( int cand0 |
1079
- // Include the sign bit in the length if it can be negative
1080
- (
1081
- if lower < 0
1082
- then cand0 = 1 + lengthInBase10 ( lower . abs ( ) )
1083
- else cand0 = lengthInBase10 ( lower )
1084
- )
1085
- or
1086
- (
1087
- if upper < 0
1088
- then cand0 = 1 + lengthInBase10 ( upper . abs ( ) )
1089
- else cand0 = lengthInBase10 ( upper )
1090
- )
1152
+ exists ( float typeBasedBound , float valueBasedBound |
1153
+ // The first case handles length sub-specifiers
1154
+ // Subtract one in the exponent because one bit is for the sign.
1155
+ // Add 1 to account for the possible sign in the output .
1156
+ typeBasedBound =
1157
+ 1 + lengthInBase10 ( 2 .pow ( this .getIntegralDisplayType ( n ) .getSize ( ) * 8 - 1 ) ) and
1158
+ // The second case uses range analysis to deduce a length that's shorter than the length
1159
+ // of the number -2^31.
1160
+ exists ( Expr arg , float lower , float upper , float typeLower , float typeUpper |
1161
+ arg = this . getUse ( ) . getConversionArgument ( n ) and
1162
+ lower = lowerBound ( arg . getFullyConverted ( ) ) and
1163
+ upper = upperBound ( arg .getFullyConverted ( ) ) and
1164
+ typeLower = exprMinVal ( arg .getFullyConverted ( ) ) and
1165
+ typeUpper = exprMaxVal ( arg . getFullyConverted ( ) )
1166
+ |
1167
+ valueBasedBound =
1168
+ max ( int cand |
1169
+ // Include the sign bit in the length if it can be negative
1170
+ (
1171
+ if lower < 0
1172
+ then cand = 1 + lengthInBase10 ( lower . abs ( ) )
1173
+ else cand = lengthInBase10 ( lower )
1174
+ )
1175
+ or
1176
+ (
1177
+ if upper < 0
1178
+ then cand = 1 + lengthInBase10 ( upper . abs ( ) )
1179
+ else cand = lengthInBase10 ( upper )
1091
1180
)
1181
+ ) and
1182
+ (
1183
+ if lower > typeLower or upper < typeUpper
1184
+ then reason = TValueFlowAnalysis ( )
1185
+ else reason = TTypeBoundsAnalysis ( )
1092
1186
)
1093
- )
1187
+ ) and
1188
+ len = valueBasedBound .minimum ( typeBasedBound )
1189
+ )
1094
1190
or
1095
1191
this .getConversionChar ( n ) .toLowerCase ( ) = "u" and
1096
1192
// e.g. 2^32 - 1 = "4294967295"
1097
- len =
1098
- min ( float cand |
1099
- // The first case handles length sub-specifiers
1100
- cand = 2 .pow ( this .getIntegralDisplayType ( n ) .getSize ( ) * 8 )
1101
- or
1102
- // The second case uses range analysis to deduce a length that's shorter than
1103
- // the length of the number 2^31 - 1.
1104
- exists ( Expr arg , float lower |
1105
- arg = this .getUse ( ) .getConversionArgument ( n ) and
1106
- lower = lowerBound ( arg .getFullyConverted ( ) )
1107
- |
1108
- cand =
1109
- max ( float cand0 |
1193
+ exists ( float typeBasedBound , float valueBasedBound |
1194
+ // The first case handles length sub-specifiers
1195
+ typeBasedBound = lengthInBase10 ( 2 .pow ( this .getIntegralDisplayType ( n ) .getSize ( ) * 8 ) - 1 ) and
1196
+ // The second case uses range analysis to deduce a length that's shorter than
1197
+ // the length of the number 2^31 - 1.
1198
+ exists ( Expr arg , float lower , float upper , float typeLower , float typeUpper |
1199
+ arg = this .getUse ( ) .getConversionArgument ( n ) and
1200
+ lower = lowerBound ( arg .getFullyConverted ( ) ) and
1201
+ upper = upperBound ( arg .getFullyConverted ( ) ) and
1202
+ typeLower = exprMinVal ( arg .getFullyConverted ( ) ) and
1203
+ typeUpper = exprMaxVal ( arg .getFullyConverted ( ) )
1204
+ |
1205
+ valueBasedBound =
1206
+ lengthInBase10 ( max ( float cand |
1110
1207
// If lower can be negative we use `(unsigned)-1` as the candidate value.
1111
1208
lower < 0 and
1112
- cand0 = 2 .pow ( any ( IntType t | t .isUnsigned ( ) ) .getSize ( ) * 8 )
1209
+ cand = 2 .pow ( any ( IntType t | t .isUnsigned ( ) ) .getSize ( ) * 8 )
1113
1210
or
1114
- cand0 = upperBound ( arg .getFullyConverted ( ) )
1115
- )
1211
+ cand = upper
1212
+ ) ) and
1213
+ (
1214
+ if lower > typeLower or upper < typeUpper
1215
+ then reason = TValueFlowAnalysis ( )
1216
+ else reason = TTypeBoundsAnalysis ( )
1116
1217
)
1117
- |
1118
- lengthInBase10 ( cand )
1119
- )
1218
+ ) and
1219
+ len = valueBasedBound . minimum ( typeBasedBound )
1220
+ )
1120
1221
or
1121
1222
this .getConversionChar ( n ) .toLowerCase ( ) = "x" and
1122
1223
// e.g. "12345678"
@@ -1135,7 +1236,8 @@ class FormatLiteral extends Literal {
1135
1236
(
1136
1237
if this .hasAlternateFlag ( n ) then len = 2 + baseLen else len = baseLen // "0x"
1137
1238
)
1138
- )
1239
+ ) and
1240
+ reason = TTypeBoundsAnalysis ( )
1139
1241
or
1140
1242
this .getConversionChar ( n ) .toLowerCase ( ) = "p" and
1141
1243
exists ( PointerType ptrType , int baseLen |
@@ -1144,7 +1246,8 @@ class FormatLiteral extends Literal {
1144
1246
(
1145
1247
if this .hasAlternateFlag ( n ) then len = 2 + baseLen else len = baseLen // "0x"
1146
1248
)
1147
- )
1249
+ ) and
1250
+ reason = TValueFlowAnalysis ( )
1148
1251
or
1149
1252
this .getConversionChar ( n ) .toLowerCase ( ) = "o" and
1150
1253
// e.g. 2^32 - 1 = "37777777777"
@@ -1163,14 +1266,16 @@ class FormatLiteral extends Literal {
1163
1266
(
1164
1267
if this .hasAlternateFlag ( n ) then len = 1 + baseLen else len = baseLen // "0"
1165
1268
)
1166
- )
1269
+ ) and
1270
+ reason = TTypeBoundsAnalysis ( )
1167
1271
or
1168
1272
this .getConversionChar ( n ) .toLowerCase ( ) = "s" and
1169
1273
len =
1170
1274
min ( int v |
1171
1275
v = this .getPrecision ( n ) or
1172
1276
v = this .getUse ( ) .getFormatArgument ( n ) .( AnalysedString ) .getMaxLength ( ) - 1 // (don't count null terminator)
1173
- )
1277
+ ) and
1278
+ reason = TValueFlowAnalysis ( )
1174
1279
)
1175
1280
)
1176
1281
}
@@ -1182,10 +1287,19 @@ class FormatLiteral extends Literal {
1182
1287
* determining whether a buffer overflow is caused by long float to string
1183
1288
* conversions.
1184
1289
*/
1185
- int getMaxConvertedLengthLimited ( int n ) {
1290
+ int getMaxConvertedLengthLimited ( int n ) { result = max ( getMaxConvertedLengthLimited ( n , _) ) }
1291
+
1292
+ /**
1293
+ * Gets the maximum length of the string that can be produced by the nth
1294
+ * conversion specifier of this format string, specifying the reason for the
1295
+ * estimation, except that float to string conversions are assumed to be 8
1296
+ * characters. This is helpful for determining whether a buffer overflow is
1297
+ * caused by long float to string conversions.
1298
+ */
1299
+ int getMaxConvertedLengthLimited ( int n , BufferWriteEstimationReason reason ) {
1186
1300
if this .getConversionChar ( n ) .toLowerCase ( ) = "f"
1187
- then result = this .getMaxConvertedLength ( n ) .minimum ( 8 )
1188
- else result = this .getMaxConvertedLength ( n )
1301
+ then result = this .getMaxConvertedLength ( n , reason ) .minimum ( 8 )
1302
+ else result = this .getMaxConvertedLength ( n , reason )
1189
1303
}
1190
1304
1191
1305
/**
@@ -1225,35 +1339,60 @@ class FormatLiteral extends Literal {
1225
1339
)
1226
1340
}
1227
1341
1228
- private int getMaxConvertedLengthAfter ( int n ) {
1342
+ private int getMaxConvertedLengthAfter ( int n , BufferWriteEstimationReason reason ) {
1229
1343
if n = this .getNumConvSpec ( )
1230
- then result = this .getConstantSuffix ( ) .length ( ) + 1
1344
+ then result = this .getConstantSuffix ( ) .length ( ) + 1 and reason = TValueFlowAnalysis ( )
1231
1345
else
1232
- result =
1233
- this .getConstantPart ( n ) .length ( ) + this .getMaxConvertedLength ( n ) +
1234
- this .getMaxConvertedLengthAfter ( n + 1 )
1346
+ exists ( BufferWriteEstimationReason headReason , BufferWriteEstimationReason tailReason |
1347
+ result =
1348
+ this .getConstantPart ( n ) .length ( ) + this .getMaxConvertedLength ( n , headReason ) +
1349
+ this .getMaxConvertedLengthAfter ( n + 1 , tailReason ) and
1350
+ reason = headReason .combineWith ( tailReason )
1351
+ )
1235
1352
}
1236
1353
1237
- private int getMaxConvertedLengthAfterLimited ( int n ) {
1354
+ private int getMaxConvertedLengthAfterLimited ( int n , BufferWriteEstimationReason reason ) {
1238
1355
if n = this .getNumConvSpec ( )
1239
- then result = this .getConstantSuffix ( ) .length ( ) + 1
1356
+ then result = this .getConstantSuffix ( ) .length ( ) + 1 and reason = TValueFlowAnalysis ( )
1240
1357
else
1241
- result =
1242
- this .getConstantPart ( n ) .length ( ) + this .getMaxConvertedLengthLimited ( n ) +
1243
- this .getMaxConvertedLengthAfterLimited ( n + 1 )
1358
+ exists ( BufferWriteEstimationReason headReason , BufferWriteEstimationReason tailReason |
1359
+ result =
1360
+ this .getConstantPart ( n ) .length ( ) + this .getMaxConvertedLengthLimited ( n , headReason ) +
1361
+ this .getMaxConvertedLengthAfterLimited ( n + 1 , tailReason ) and
1362
+ reason = headReason .combineWith ( tailReason )
1363
+ )
1244
1364
}
1245
1365
1246
1366
/**
1247
1367
* Gets the maximum length of the string that can be produced by this format
1248
1368
* string. Has no result if this cannot be determined.
1249
1369
*/
1250
- int getMaxConvertedLength ( ) { result = this .getMaxConvertedLengthAfter ( 0 ) }
1370
+ int getMaxConvertedLength ( ) { result = this .getMaxConvertedLengthAfter ( 0 , _ ) }
1251
1371
1252
1372
/**
1253
1373
* Gets the maximum length of the string that can be produced by this format
1254
1374
* string, except that float to string conversions are assumed to be 8
1255
1375
* characters. This is helpful for determining whether a buffer overflow
1256
1376
* is caused by long float to string conversions.
1257
1377
*/
1258
- int getMaxConvertedLengthLimited ( ) { result = this .getMaxConvertedLengthAfterLimited ( 0 ) }
1378
+ int getMaxConvertedLengthLimited ( ) { result = this .getMaxConvertedLengthAfterLimited ( 0 , _) }
1379
+
1380
+ /**
1381
+ * Gets the maximum length of the string that can be produced by this format
1382
+ * string, specifying the reason for the estimate. Has no result if no estimate
1383
+ * can be found.
1384
+ */
1385
+ int getMaxConvertedLengthWithReason ( BufferWriteEstimationReason reason ) {
1386
+ result = this .getMaxConvertedLengthAfter ( 0 , reason )
1387
+ }
1388
+
1389
+ /**
1390
+ * Gets the maximum length of the string that can be produced by this format
1391
+ * string, specifying the reason for the estimate, except that float to string
1392
+ * conversions are assumed to be 8 characters. This is helpful for determining
1393
+ * whether a buffer overflow is caused by long float to string conversions.
1394
+ */
1395
+ int getMaxConvertedLengthLimitedWithReason ( BufferWriteEstimationReason reason ) {
1396
+ result = this .getMaxConvertedLengthAfterLimited ( 0 , reason )
1397
+ }
1259
1398
}
0 commit comments