@@ -6,6 +6,8 @@ import semmle.code.cpp.Type
6
6
import semmle.code.cpp.commons.CommonType
7
7
import semmle.code.cpp.commons.StringAnalysis
8
8
import semmle.code.cpp.models.interfaces.FormattingFunction
9
+ private import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
10
+ private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
9
11
10
12
class PrintfFormatAttribute extends FormatAttribute {
11
13
PrintfFormatAttribute ( ) { this .getArchetype ( ) = [ "printf" , "__printf__" ] }
@@ -268,6 +270,18 @@ class FormattingFunctionCall extends Expr {
268
270
}
269
271
}
270
272
273
+ /**
274
+ * Gets the number of digits required to represent the integer represented by `f`.
275
+ *
276
+ * `f` is assumed to be nonnegative.
277
+ */
278
+ bindingset [ f]
279
+ private int lengthInBase10 ( float f ) {
280
+ f = 0 and result = 0
281
+ or
282
+ result = f .log10 ( ) .ceil ( )
283
+ }
284
+
271
285
/**
272
286
* A class to represent format strings that occur as arguments to invocations of formatting functions.
273
287
*/
@@ -1046,39 +1060,64 @@ class FormatLiteral extends Literal {
1046
1060
or
1047
1061
this .getConversionChar ( n ) .toLowerCase ( ) = [ "d" , "i" ] and
1048
1062
// e.g. -2^31 = "-2147483648"
1049
- exists ( int sizeBits |
1050
- sizeBits =
1051
- min ( int bits |
1052
- bits = this .getIntegralDisplayType ( n ) .getSize ( ) * 8
1053
- or
1054
- exists ( IntegralType t |
1055
- t = this .getUse ( ) .getConversionArgument ( n ) .getType ( ) .getUnderlyingType ( )
1056
- |
1057
- t .isSigned ( ) and bits = t .getSize ( ) * 8
1058
- )
1059
- ) and
1060
- len = 1 + ( ( sizeBits - 1 ) / 10.0 .log2 ( ) ) .ceil ( )
1061
- // this calculation is as %u (below) only we take out the sign bit (- 1) and allow a whole
1062
- // character for it to be expressed as '-'.
1063
- )
1063
+ len =
1064
+ min ( float cand |
1065
+ // The first case handles length sub-specifiers
1066
+ // Subtract one in the exponent because one bit is for the sign.
1067
+ // Add 1 to account for the possible sign in the output.
1068
+ cand = 1 + lengthInBase10 ( 2 .pow ( this .getIntegralDisplayType ( n ) .getSize ( ) * 8 - 1 ) )
1069
+ or
1070
+ // The second case uses range analysis to deduce a length that's shorter than the length
1071
+ // of the number -2^31.
1072
+ exists ( Expr arg , float lower |
1073
+ arg = this .getUse ( ) .getConversionArgument ( n ) and
1074
+ lower = lowerBound ( arg .getFullyConverted ( ) )
1075
+ |
1076
+ cand =
1077
+ max ( int cand0 |
1078
+ (
1079
+ // Include the sign bit in the length of `lower` if it can be negative
1080
+ if lower < 0
1081
+ then cand0 = 1 + lengthInBase10 ( lower .abs ( ) )
1082
+ else cand0 = lengthInBase10 ( lower )
1083
+ )
1084
+ or
1085
+ // We don't care about the sign of `upper`: if `upper` is negative, then we know
1086
+ // `lower` is also (possibly more) negative, and thus its length will be greater.
1087
+ cand0 = lengthInBase10 ( upperBound ( arg .getFullyConverted ( ) ) )
1088
+ )
1089
+ )
1090
+ )
1064
1091
or
1065
1092
this .getConversionChar ( n ) .toLowerCase ( ) = "u" and
1066
1093
// e.g. 2^32 - 1 = "4294967295"
1067
- exists ( int sizeBits |
1068
- sizeBits =
1069
- min ( int bits |
1070
- bits = this .getIntegralDisplayType ( n ) .getSize ( ) * 8
1071
- or
1072
- exists ( IntegralType t |
1073
- t = this .getUse ( ) .getConversionArgument ( n ) .getType ( ) .getUnderlyingType ( )
1074
- |
1075
- t .isUnsigned ( ) and bits = t .getSize ( ) * 8
1076
- )
1077
- ) and
1078
- len = ( sizeBits / 10.0 .log2 ( ) ) .ceil ( )
1079
- // convert the size from bits to decimal characters, and round up as you can't have
1080
- // fractional characters (10.0.log2() is the number of bits expressed per decimal character)
1081
- )
1094
+ len =
1095
+ min ( float cand |
1096
+ // The first case handles length sub-specifiers
1097
+ cand = 2 .pow ( this .getIntegralDisplayType ( n ) .getSize ( ) * 8 )
1098
+ or
1099
+ // The second case uses range analysis to deduce a length that's shorter than
1100
+ // the length of the number 2^31 - 1.
1101
+ exists ( Expr arg , float lower |
1102
+ arg = this .getUse ( ) .getConversionArgument ( n ) and
1103
+ lower = lowerBound ( arg .getFullyConverted ( ) )
1104
+ |
1105
+ cand =
1106
+ max ( float cand0 |
1107
+ (
1108
+ if lower < 0
1109
+ then
1110
+ // Calculate the value of `(unsigned)lower`.
1111
+ cand0 = 2 .pow ( any ( IntType t | t .isUnsigned ( ) ) .getSize ( ) * 8 ) + lower
1112
+ else cand0 = lower
1113
+ )
1114
+ or
1115
+ cand0 = upperBound ( arg .getFullyConverted ( ) )
1116
+ )
1117
+ )
1118
+ |
1119
+ lengthInBase10 ( cand )
1120
+ )
1082
1121
or
1083
1122
this .getConversionChar ( n ) .toLowerCase ( ) = "x" and
1084
1123
// e.g. "12345678"
0 commit comments