Skip to content

Commit a3e4556

Browse files
build: Add format string linter
This linter checks that the number of arguments passed to each variadic format string function matches the number of format specifiers in the format string.
1 parent 8c3c402 commit a3e4556

File tree

2 files changed

+292
-0
lines changed

2 files changed

+292
-0
lines changed

test/lint/lint-format-strings.py

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
#!/usr/bin/env python3
2+
#
3+
# Copyright (c) 2018 The Bitcoin Core developers
4+
# Distributed under the MIT software license, see the accompanying
5+
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
6+
#
7+
# Lint format strings: This program checks that the number of arguments passed
8+
# to a variadic format string function matches the number of format specifiers
9+
# in the format string.
10+
11+
import argparse
12+
import re
13+
import sys
14+
15+
FALSE_POSITIVES = [
16+
("src/dbwrapper.cpp", "vsnprintf(p, limit - p, format, backup_ap)"),
17+
("src/index/base.cpp", "FatalError(const char* fmt, const Args&... args)"),
18+
("src/netbase.cpp", "LogConnectFailure(bool manual_connection, const char* fmt, const Args&... args)"),
19+
("src/util.cpp", "strprintf(_(COPYRIGHT_HOLDERS), _(COPYRIGHT_HOLDERS_SUBSTITUTION))"),
20+
("src/util.cpp", "strprintf(COPYRIGHT_HOLDERS, COPYRIGHT_HOLDERS_SUBSTITUTION)"),
21+
]
22+
23+
24+
def parse_function_calls(function_name, source_code):
25+
"""Return an array with all calls to function function_name in string source_code.
26+
Preprocessor directives and C++ style comments ("//") in source_code are removed.
27+
28+
>>> len(parse_function_calls("foo", "foo();bar();foo();bar();"))
29+
2
30+
>>> parse_function_calls("foo", "foo(1);bar(1);foo(2);bar(2);")[0].startswith("foo(1);")
31+
True
32+
>>> parse_function_calls("foo", "foo(1);bar(1);foo(2);bar(2);")[1].startswith("foo(2);")
33+
True
34+
>>> len(parse_function_calls("foo", "foo();bar();// foo();bar();"))
35+
1
36+
>>> len(parse_function_calls("foo", "#define FOO foo();"))
37+
0
38+
"""
39+
assert(type(function_name) is str and type(source_code) is str and function_name)
40+
lines = [re.sub("// .*", " ", line).strip()
41+
for line in source_code.split("\n")
42+
if not line.strip().startswith("#")]
43+
return re.findall(r"[^a-zA-Z_](?=({}\(.*).*)".format(function_name), " " + " ".join(lines))
44+
45+
46+
def normalize(s):
47+
"""Return a normalized version of string s with newlines, tabs and C style comments ("/* ... */")
48+
replaced with spaces. Multiple spaces are replaced with a single space.
49+
50+
>>> normalize(" /* nothing */ foo\tfoo /* bar */ foo ")
51+
'foo foo foo'
52+
"""
53+
assert(type(s) is str)
54+
s = s.replace("\n", " ")
55+
s = s.replace("\t", " ")
56+
s = re.sub("/\*.*?\*/", " ", s)
57+
s = re.sub(" {2,}", " ", s)
58+
return s.strip()
59+
60+
61+
ESCAPE_MAP = {
62+
r"\n": "[escaped-newline]",
63+
r"\t": "[escaped-tab]",
64+
r'\"': "[escaped-quote]",
65+
}
66+
67+
68+
def escape(s):
69+
"""Return the escaped version of string s with "\\\"", "\\n" and "\\t" escaped as
70+
"[escaped-backslash]", "[escaped-newline]" and "[escaped-tab]".
71+
72+
>>> unescape(escape("foo")) == "foo"
73+
True
74+
>>> escape(r'foo \\t foo \\n foo \\\\ foo \\ foo \\"bar\\"')
75+
'foo [escaped-tab] foo [escaped-newline] foo \\\\\\\\ foo \\\\ foo [escaped-quote]bar[escaped-quote]'
76+
"""
77+
assert(type(s) is str)
78+
for raw_value, escaped_value in ESCAPE_MAP.items():
79+
s = s.replace(raw_value, escaped_value)
80+
return s
81+
82+
83+
def unescape(s):
84+
"""Return the unescaped version of escaped string s.
85+
Reverses the replacements made in function escape(s).
86+
87+
>>> unescape(escape("bar"))
88+
'bar'
89+
>>> unescape("foo [escaped-tab] foo [escaped-newline] foo \\\\\\\\ foo \\\\ foo [escaped-quote]bar[escaped-quote]")
90+
'foo \\\\t foo \\\\n foo \\\\\\\\ foo \\\\ foo \\\\"bar\\\\"'
91+
"""
92+
assert(type(s) is str)
93+
for raw_value, escaped_value in ESCAPE_MAP.items():
94+
s = s.replace(escaped_value, raw_value)
95+
return s
96+
97+
98+
def parse_function_call_and_arguments(function_name, function_call):
99+
"""Split string function_call into an array of strings consisting of:
100+
* the string function_call followed by "("
101+
* the function call argument #1
102+
* ...
103+
* the function call argument #n
104+
* a trailing ");"
105+
106+
The strings returned are in escaped form. See escape(...).
107+
108+
>>> parse_function_call_and_arguments("foo", 'foo("%s", "foo");')
109+
['foo(', '"%s",', ' "foo"', ')']
110+
>>> parse_function_call_and_arguments("foo", 'foo("%s", "foo");')
111+
['foo(', '"%s",', ' "foo"', ')']
112+
>>> parse_function_call_and_arguments("foo", 'foo("%s %s", "foo", "bar");')
113+
['foo(', '"%s %s",', ' "foo",', ' "bar"', ')']
114+
>>> parse_function_call_and_arguments("fooprintf", 'fooprintf("%050d", i);')
115+
['fooprintf(', '"%050d",', ' i', ')']
116+
>>> parse_function_call_and_arguments("foo", 'foo(bar(foobar(barfoo("foo"))), foobar); barfoo')
117+
['foo(', 'bar(foobar(barfoo("foo"))),', ' foobar', ')']
118+
>>> parse_function_call_and_arguments("foo", "foo()")
119+
['foo(', '', ')']
120+
>>> parse_function_call_and_arguments("foo", "foo(123)")
121+
['foo(', '123', ')']
122+
>>> parse_function_call_and_arguments("foo", 'foo("foo")')
123+
['foo(', '"foo"', ')']
124+
"""
125+
assert(type(function_name) is str and type(function_call) is str and function_name)
126+
remaining = normalize(escape(function_call))
127+
expected_function_call = "{}(".format(function_name)
128+
assert(remaining.startswith(expected_function_call))
129+
parts = [expected_function_call]
130+
remaining = remaining[len(expected_function_call):]
131+
open_parentheses = 1
132+
in_string = False
133+
parts.append("")
134+
for char in remaining:
135+
parts.append(parts.pop() + char)
136+
if char == "\"":
137+
in_string = not in_string
138+
continue
139+
if in_string:
140+
continue
141+
if char == "(":
142+
open_parentheses += 1
143+
continue
144+
if char == ")":
145+
open_parentheses -= 1
146+
if open_parentheses > 1:
147+
continue
148+
if open_parentheses == 0:
149+
parts.append(parts.pop()[:-1])
150+
parts.append(char)
151+
break
152+
if char == ",":
153+
parts.append("")
154+
return parts
155+
156+
157+
def parse_string_content(argument):
158+
"""Return the text within quotes in string argument.
159+
160+
>>> parse_string_content('1 "foo %d bar" 2')
161+
'foo %d bar'
162+
>>> parse_string_content('1 foobar 2')
163+
''
164+
>>> parse_string_content('1 "bar" 2')
165+
'bar'
166+
>>> parse_string_content('1 "foo" 2 "bar" 3')
167+
'foobar'
168+
>>> parse_string_content('1 "foo" 2 " " "bar" 3')
169+
'foo bar'
170+
>>> parse_string_content('""')
171+
''
172+
>>> parse_string_content('')
173+
''
174+
>>> parse_string_content('1 2 3')
175+
''
176+
"""
177+
assert(type(argument) is str)
178+
string_content = ""
179+
in_string = False
180+
for char in normalize(escape(argument)):
181+
if char == "\"":
182+
in_string = not in_string
183+
elif in_string:
184+
string_content += char
185+
return string_content
186+
187+
188+
def count_format_specifiers(format_string):
189+
"""Return the number of format specifiers in string format_string.
190+
191+
>>> count_format_specifiers("foo bar foo")
192+
0
193+
>>> count_format_specifiers("foo %d bar foo")
194+
1
195+
>>> count_format_specifiers("foo %d bar %i foo")
196+
2
197+
>>> count_format_specifiers("foo %d bar %i foo %% foo")
198+
2
199+
>>> count_format_specifiers("foo %d bar %i foo %% foo %d foo")
200+
3
201+
>>> count_format_specifiers("foo %d bar %i foo %% foo %*d foo")
202+
4
203+
"""
204+
assert(type(format_string) is str)
205+
n = 0
206+
in_specifier = False
207+
for i, char in enumerate(format_string):
208+
if format_string[i - 1:i + 1] == "%%" or format_string[i:i + 2] == "%%":
209+
pass
210+
elif char == "%":
211+
in_specifier = True
212+
n += 1
213+
elif char in "aAcdeEfFgGinopsuxX":
214+
in_specifier = False
215+
elif in_specifier and char == "*":
216+
n += 1
217+
return n
218+
219+
220+
def main():
221+
parser = argparse.ArgumentParser(description="This program checks that the number of arguments passed "
222+
"to a variadic format string function matches the number of format "
223+
"specifiers in the format string.")
224+
parser.add_argument("--skip-arguments", type=int, help="number of arguments before the format string "
225+
"argument (e.g. 1 in the case of fprintf)", default=0)
226+
parser.add_argument("function_name", help="function name (e.g. fprintf)", default=None)
227+
parser.add_argument("file", type=argparse.FileType("r", encoding="utf-8"), nargs="*", help="C++ source code file (e.g. foo.cpp)")
228+
args = parser.parse_args()
229+
230+
exit_code = 0
231+
for f in args.file:
232+
for function_call_str in parse_function_calls(args.function_name, f.read()):
233+
parts = parse_function_call_and_arguments(args.function_name, function_call_str)
234+
relevant_function_call_str = unescape("".join(parts))[:512]
235+
if (f.name, relevant_function_call_str) in FALSE_POSITIVES:
236+
continue
237+
if len(parts) < 3 + args.skip_arguments:
238+
exit_code = 1
239+
print("{}: Could not parse function call string \"{}(...)\": {}".format(f.name, args.function_name, relevant_function_call_str))
240+
continue
241+
argument_count = len(parts) - 3 - args.skip_arguments
242+
format_str = parse_string_content(parts[1 + args.skip_arguments])
243+
format_specifier_count = count_format_specifiers(format_str)
244+
if format_specifier_count != argument_count:
245+
exit_code = 1
246+
print("{}: Expected {} argument(s) after format string but found {} argument(s): {}".format(f.name, format_specifier_count, argument_count, relevant_function_call_str))
247+
continue
248+
sys.exit(exit_code)
249+
250+
251+
if __name__ == "__main__":
252+
main()

test/lint/lint-format-strings.sh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Copyright (c) 2018 The Bitcoin Core developers
4+
# Distributed under the MIT software license, see the accompanying
5+
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
6+
#
7+
# Lint format strings: This program checks that the number of arguments passed
8+
# to a variadic format string function matches the number of format specifiers
9+
# in the format string.
10+
11+
export LC_ALL=C
12+
13+
FUNCTION_NAMES_AND_NUMBER_OF_LEADING_ARGUMENTS=(
14+
FatalError,0
15+
fprintf,1
16+
LogConnectFailure,1
17+
LogPrint,1
18+
LogPrintf,0
19+
printf,0
20+
snprintf,2
21+
sprintf,1
22+
strprintf,0
23+
vfprintf,1
24+
vprintf,1
25+
vsnprintf,1
26+
vsprintf,1
27+
)
28+
29+
EXIT_CODE=0
30+
if ! python3 -m doctest test/lint/lint-format-strings.py; then
31+
EXIT_CODE=1
32+
fi
33+
for S in "${FUNCTION_NAMES_AND_NUMBER_OF_LEADING_ARGUMENTS[@]}"; do
34+
IFS="," read -r FUNCTION_NAME SKIP_ARGUMENTS <<< "${S}"
35+
mapfile -t MATCHING_FILES < <(git grep --full-name -l "${FUNCTION_NAME}" -- "*.c" "*.cpp" "*.h" | sort | grep -vE "^src/(leveldb|secp256k1|tinyformat|univalue)")
36+
if ! test/lint/lint-format-strings.py --skip-arguments "${SKIP_ARGUMENTS}" "${FUNCTION_NAME}" "${MATCHING_FILES[@]}"; then
37+
EXIT_CODE=1
38+
fi
39+
done
40+
exit ${EXIT_CODE}

0 commit comments

Comments
 (0)