|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# |
| 3 | +# Copyright (c) 2018 The Bitcoin Core developers |
| 4 | +# Distributed under the MIT software license, see the accompanying |
| 5 | +# file COPYING or http://www.opensource.org/licenses/mit-license.php. |
| 6 | +# |
| 7 | +# Lint format strings: This program checks that the number of arguments passed |
| 8 | +# to a variadic format string function matches the number of format specifiers |
| 9 | +# in the format string. |
| 10 | + |
| 11 | +import argparse |
| 12 | +import re |
| 13 | +import sys |
| 14 | + |
| 15 | +FALSE_POSITIVES = [ |
| 16 | + ("src/dbwrapper.cpp", "vsnprintf(p, limit - p, format, backup_ap)"), |
| 17 | + ("src/index/base.cpp", "FatalError(const char* fmt, const Args&... args)"), |
| 18 | + ("src/netbase.cpp", "LogConnectFailure(bool manual_connection, const char* fmt, const Args&... args)"), |
| 19 | + ("src/util.cpp", "strprintf(_(COPYRIGHT_HOLDERS), _(COPYRIGHT_HOLDERS_SUBSTITUTION))"), |
| 20 | + ("src/util.cpp", "strprintf(COPYRIGHT_HOLDERS, COPYRIGHT_HOLDERS_SUBSTITUTION)"), |
| 21 | + ("src/wallet/wallet.h", "WalletLogPrintf(std::string fmt, Params... parameters)"), |
| 22 | + ("src/wallet/wallet.h", "LogPrintf((\"%s \" + fmt).c_str(), GetDisplayName(), parameters...)"), |
| 23 | +] |
| 24 | + |
| 25 | + |
| 26 | +def parse_function_calls(function_name, source_code): |
| 27 | + """Return an array with all calls to function function_name in string source_code. |
| 28 | + Preprocessor directives and C++ style comments ("//") in source_code are removed. |
| 29 | +
|
| 30 | + >>> len(parse_function_calls("foo", "foo();bar();foo();bar();")) |
| 31 | + 2 |
| 32 | + >>> parse_function_calls("foo", "foo(1);bar(1);foo(2);bar(2);")[0].startswith("foo(1);") |
| 33 | + True |
| 34 | + >>> parse_function_calls("foo", "foo(1);bar(1);foo(2);bar(2);")[1].startswith("foo(2);") |
| 35 | + True |
| 36 | + >>> len(parse_function_calls("foo", "foo();bar();// foo();bar();")) |
| 37 | + 1 |
| 38 | + >>> len(parse_function_calls("foo", "#define FOO foo();")) |
| 39 | + 0 |
| 40 | + """ |
| 41 | + assert(type(function_name) is str and type(source_code) is str and function_name) |
| 42 | + lines = [re.sub("// .*", " ", line).strip() |
| 43 | + for line in source_code.split("\n") |
| 44 | + if not line.strip().startswith("#")] |
| 45 | + return re.findall(r"[^a-zA-Z_](?=({}\(.*).*)".format(function_name), " " + " ".join(lines)) |
| 46 | + |
| 47 | + |
| 48 | +def normalize(s): |
| 49 | + """Return a normalized version of string s with newlines, tabs and C style comments ("/* ... */") |
| 50 | + replaced with spaces. Multiple spaces are replaced with a single space. |
| 51 | +
|
| 52 | + >>> normalize(" /* nothing */ foo\tfoo /* bar */ foo ") |
| 53 | + 'foo foo foo' |
| 54 | + """ |
| 55 | + assert(type(s) is str) |
| 56 | + s = s.replace("\n", " ") |
| 57 | + s = s.replace("\t", " ") |
| 58 | + s = re.sub("/\*.*?\*/", " ", s) |
| 59 | + s = re.sub(" {2,}", " ", s) |
| 60 | + return s.strip() |
| 61 | + |
| 62 | + |
| 63 | +ESCAPE_MAP = { |
| 64 | + r"\n": "[escaped-newline]", |
| 65 | + r"\t": "[escaped-tab]", |
| 66 | + r'\"': "[escaped-quote]", |
| 67 | +} |
| 68 | + |
| 69 | + |
| 70 | +def escape(s): |
| 71 | + """Return the escaped version of string s with "\\\"", "\\n" and "\\t" escaped as |
| 72 | + "[escaped-backslash]", "[escaped-newline]" and "[escaped-tab]". |
| 73 | +
|
| 74 | + >>> unescape(escape("foo")) == "foo" |
| 75 | + True |
| 76 | + >>> escape(r'foo \\t foo \\n foo \\\\ foo \\ foo \\"bar\\"') |
| 77 | + 'foo [escaped-tab] foo [escaped-newline] foo \\\\\\\\ foo \\\\ foo [escaped-quote]bar[escaped-quote]' |
| 78 | + """ |
| 79 | + assert(type(s) is str) |
| 80 | + for raw_value, escaped_value in ESCAPE_MAP.items(): |
| 81 | + s = s.replace(raw_value, escaped_value) |
| 82 | + return s |
| 83 | + |
| 84 | + |
| 85 | +def unescape(s): |
| 86 | + """Return the unescaped version of escaped string s. |
| 87 | + Reverses the replacements made in function escape(s). |
| 88 | +
|
| 89 | + >>> unescape(escape("bar")) |
| 90 | + 'bar' |
| 91 | + >>> unescape("foo [escaped-tab] foo [escaped-newline] foo \\\\\\\\ foo \\\\ foo [escaped-quote]bar[escaped-quote]") |
| 92 | + 'foo \\\\t foo \\\\n foo \\\\\\\\ foo \\\\ foo \\\\"bar\\\\"' |
| 93 | + """ |
| 94 | + assert(type(s) is str) |
| 95 | + for raw_value, escaped_value in ESCAPE_MAP.items(): |
| 96 | + s = s.replace(escaped_value, raw_value) |
| 97 | + return s |
| 98 | + |
| 99 | + |
| 100 | +def parse_function_call_and_arguments(function_name, function_call): |
| 101 | + """Split string function_call into an array of strings consisting of: |
| 102 | + * the string function_call followed by "(" |
| 103 | + * the function call argument #1 |
| 104 | + * ... |
| 105 | + * the function call argument #n |
| 106 | + * a trailing ");" |
| 107 | +
|
| 108 | + The strings returned are in escaped form. See escape(...). |
| 109 | +
|
| 110 | + >>> parse_function_call_and_arguments("foo", 'foo("%s", "foo");') |
| 111 | + ['foo(', '"%s",', ' "foo"', ')'] |
| 112 | + >>> parse_function_call_and_arguments("foo", 'foo("%s", "foo");') |
| 113 | + ['foo(', '"%s",', ' "foo"', ')'] |
| 114 | + >>> parse_function_call_and_arguments("foo", 'foo("%s %s", "foo", "bar");') |
| 115 | + ['foo(', '"%s %s",', ' "foo",', ' "bar"', ')'] |
| 116 | + >>> parse_function_call_and_arguments("fooprintf", 'fooprintf("%050d", i);') |
| 117 | + ['fooprintf(', '"%050d",', ' i', ')'] |
| 118 | + >>> parse_function_call_and_arguments("foo", 'foo(bar(foobar(barfoo("foo"))), foobar); barfoo') |
| 119 | + ['foo(', 'bar(foobar(barfoo("foo"))),', ' foobar', ')'] |
| 120 | + >>> parse_function_call_and_arguments("foo", "foo()") |
| 121 | + ['foo(', '', ')'] |
| 122 | + >>> parse_function_call_and_arguments("foo", "foo(123)") |
| 123 | + ['foo(', '123', ')'] |
| 124 | + >>> parse_function_call_and_arguments("foo", 'foo("foo")') |
| 125 | + ['foo(', '"foo"', ')'] |
| 126 | + """ |
| 127 | + assert(type(function_name) is str and type(function_call) is str and function_name) |
| 128 | + remaining = normalize(escape(function_call)) |
| 129 | + expected_function_call = "{}(".format(function_name) |
| 130 | + assert(remaining.startswith(expected_function_call)) |
| 131 | + parts = [expected_function_call] |
| 132 | + remaining = remaining[len(expected_function_call):] |
| 133 | + open_parentheses = 1 |
| 134 | + in_string = False |
| 135 | + parts.append("") |
| 136 | + for char in remaining: |
| 137 | + parts.append(parts.pop() + char) |
| 138 | + if char == "\"": |
| 139 | + in_string = not in_string |
| 140 | + continue |
| 141 | + if in_string: |
| 142 | + continue |
| 143 | + if char == "(": |
| 144 | + open_parentheses += 1 |
| 145 | + continue |
| 146 | + if char == ")": |
| 147 | + open_parentheses -= 1 |
| 148 | + if open_parentheses > 1: |
| 149 | + continue |
| 150 | + if open_parentheses == 0: |
| 151 | + parts.append(parts.pop()[:-1]) |
| 152 | + parts.append(char) |
| 153 | + break |
| 154 | + if char == ",": |
| 155 | + parts.append("") |
| 156 | + return parts |
| 157 | + |
| 158 | + |
| 159 | +def parse_string_content(argument): |
| 160 | + """Return the text within quotes in string argument. |
| 161 | +
|
| 162 | + >>> parse_string_content('1 "foo %d bar" 2') |
| 163 | + 'foo %d bar' |
| 164 | + >>> parse_string_content('1 foobar 2') |
| 165 | + '' |
| 166 | + >>> parse_string_content('1 "bar" 2') |
| 167 | + 'bar' |
| 168 | + >>> parse_string_content('1 "foo" 2 "bar" 3') |
| 169 | + 'foobar' |
| 170 | + >>> parse_string_content('1 "foo" 2 " " "bar" 3') |
| 171 | + 'foo bar' |
| 172 | + >>> parse_string_content('""') |
| 173 | + '' |
| 174 | + >>> parse_string_content('') |
| 175 | + '' |
| 176 | + >>> parse_string_content('1 2 3') |
| 177 | + '' |
| 178 | + """ |
| 179 | + assert(type(argument) is str) |
| 180 | + string_content = "" |
| 181 | + in_string = False |
| 182 | + for char in normalize(escape(argument)): |
| 183 | + if char == "\"": |
| 184 | + in_string = not in_string |
| 185 | + elif in_string: |
| 186 | + string_content += char |
| 187 | + return string_content |
| 188 | + |
| 189 | + |
| 190 | +def count_format_specifiers(format_string): |
| 191 | + """Return the number of format specifiers in string format_string. |
| 192 | +
|
| 193 | + >>> count_format_specifiers("foo bar foo") |
| 194 | + 0 |
| 195 | + >>> count_format_specifiers("foo %d bar foo") |
| 196 | + 1 |
| 197 | + >>> count_format_specifiers("foo %d bar %i foo") |
| 198 | + 2 |
| 199 | + >>> count_format_specifiers("foo %d bar %i foo %% foo") |
| 200 | + 2 |
| 201 | + >>> count_format_specifiers("foo %d bar %i foo %% foo %d foo") |
| 202 | + 3 |
| 203 | + >>> count_format_specifiers("foo %d bar %i foo %% foo %*d foo") |
| 204 | + 4 |
| 205 | + """ |
| 206 | + assert(type(format_string) is str) |
| 207 | + n = 0 |
| 208 | + in_specifier = False |
| 209 | + for i, char in enumerate(format_string): |
| 210 | + if format_string[i - 1:i + 1] == "%%" or format_string[i:i + 2] == "%%": |
| 211 | + pass |
| 212 | + elif char == "%": |
| 213 | + in_specifier = True |
| 214 | + n += 1 |
| 215 | + elif char in "aAcdeEfFgGinopsuxX": |
| 216 | + in_specifier = False |
| 217 | + elif in_specifier and char == "*": |
| 218 | + n += 1 |
| 219 | + return n |
| 220 | + |
| 221 | + |
| 222 | +def main(): |
| 223 | + parser = argparse.ArgumentParser(description="This program checks that the number of arguments passed " |
| 224 | + "to a variadic format string function matches the number of format " |
| 225 | + "specifiers in the format string.") |
| 226 | + parser.add_argument("--skip-arguments", type=int, help="number of arguments before the format string " |
| 227 | + "argument (e.g. 1 in the case of fprintf)", default=0) |
| 228 | + parser.add_argument("function_name", help="function name (e.g. fprintf)", default=None) |
| 229 | + parser.add_argument("file", type=argparse.FileType("r", encoding="utf-8"), nargs="*", help="C++ source code file (e.g. foo.cpp)") |
| 230 | + args = parser.parse_args() |
| 231 | + |
| 232 | + exit_code = 0 |
| 233 | + for f in args.file: |
| 234 | + for function_call_str in parse_function_calls(args.function_name, f.read()): |
| 235 | + parts = parse_function_call_and_arguments(args.function_name, function_call_str) |
| 236 | + relevant_function_call_str = unescape("".join(parts))[:512] |
| 237 | + if (f.name, relevant_function_call_str) in FALSE_POSITIVES: |
| 238 | + continue |
| 239 | + if len(parts) < 3 + args.skip_arguments: |
| 240 | + exit_code = 1 |
| 241 | + print("{}: Could not parse function call string \"{}(...)\": {}".format(f.name, args.function_name, relevant_function_call_str)) |
| 242 | + continue |
| 243 | + argument_count = len(parts) - 3 - args.skip_arguments |
| 244 | + format_str = parse_string_content(parts[1 + args.skip_arguments]) |
| 245 | + format_specifier_count = count_format_specifiers(format_str) |
| 246 | + if format_specifier_count != argument_count: |
| 247 | + exit_code = 1 |
| 248 | + print("{}: Expected {} argument(s) after format string but found {} argument(s): {}".format(f.name, format_specifier_count, argument_count, relevant_function_call_str)) |
| 249 | + continue |
| 250 | + sys.exit(exit_code) |
| 251 | + |
| 252 | + |
| 253 | +if __name__ == "__main__": |
| 254 | + main() |
0 commit comments