Skip to content

Commit 229917d

Browse files
committed
lint: Convert lint-locale-dependence.sh to Python
1 parent 094d9fd commit 229917d

File tree

2 files changed

+258
-241
lines changed

2 files changed

+258
-241
lines changed

test/lint/lint-locale-dependence.py

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) 2018-2022 The Bitcoin Core developers
3+
# Distributed under the MIT software license, see the accompanying
4+
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
5+
#
6+
# Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
7+
# opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
8+
# whereas no such call is made in bitcoind.
9+
#
10+
# Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
11+
# specified by the user's LC_ALL (or LC_*) environment variable as the new
12+
# C locale.
13+
#
14+
# In contrast, bitcoind does not opt in to localization -- no call to
15+
# setlocale(LC_ALL, "") is made and the environment variables LC_* are
16+
# thus ignored.
17+
#
18+
# This results in situations where bitcoind is guaranteed to be running
19+
# with the classic locale ("C") whereas the locale of bitcoin-qt will vary
20+
# depending on the user's environment variables.
21+
#
22+
# An example: Assuming the environment variable LC_ALL=de_DE then the
23+
# call std::to_string(1.23) will return "1.230000" in bitcoind but
24+
# "1,230000" in bitcoin-qt.
25+
#
26+
# From the Qt documentation:
27+
# "On Unix/Linux Qt is configured to use the system locale settings by default.
28+
# This can cause a conflict when using POSIX functions, for instance, when
29+
# converting between data types such as floats and strings, since the notation
30+
# may differ between locales. To get around this problem, call the POSIX function
31+
# setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
32+
# or QCoreApplication to reset the locale that is used for number formatting to
33+
# "C"-locale."
34+
#
35+
# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
36+
# https://stackoverflow.com/a/34878283 for more details.
37+
#
38+
# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent snprintf with strprintf.
39+
40+
import re
41+
import sys
42+
43+
from subprocess import check_output, CalledProcessError
44+
45+
46+
KNOWN_VIOLATIONS = [
47+
"src/dbwrapper.cpp:.*vsnprintf",
48+
"src/test/dbwrapper_tests.cpp:.*snprintf",
49+
"src/test/fuzz/locale.cpp",
50+
"src/test/fuzz/string.cpp",
51+
"src/test/util_tests.cpp"
52+
]
53+
54+
REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [
55+
"src/crypto/ctaes/",
56+
"src/leveldb/",
57+
"src/secp256k1/",
58+
"src/minisketch/",
59+
"src/tinyformat.h",
60+
"src/univalue/"
61+
]
62+
63+
LOCALE_DEPENDENT_FUNCTIONS = [
64+
"alphasort", # LC_COLLATE (via strcoll)
65+
"asctime", # LC_TIME (directly)
66+
"asprintf", # (via vasprintf)
67+
"atof", # LC_NUMERIC (via strtod)
68+
"atoi", # LC_NUMERIC (via strtol)
69+
"atol", # LC_NUMERIC (via strtol)
70+
"atoll", # (via strtoll)
71+
"atoq",
72+
"btowc", # LC_CTYPE (directly)
73+
"ctime", # (via asctime or localtime)
74+
"dprintf", # (via vdprintf)
75+
"fgetwc",
76+
"fgetws",
77+
"fold_case", # boost::locale::fold_case
78+
"fprintf", # (via vfprintf)
79+
"fputwc",
80+
"fputws",
81+
"fscanf", # (via __vfscanf)
82+
"fwprintf", # (via __vfwprintf)
83+
"getdate", # via __getdate_r => isspace // __localtime_r
84+
"getwc",
85+
"getwchar",
86+
"is_digit", # boost::algorithm::is_digit
87+
"is_space", # boost::algorithm::is_space
88+
"isalnum", # LC_CTYPE
89+
"isalpha", # LC_CTYPE
90+
"isblank", # LC_CTYPE
91+
"iscntrl", # LC_CTYPE
92+
"isctype", # LC_CTYPE
93+
"isdigit", # LC_CTYPE
94+
"isgraph", # LC_CTYPE
95+
"islower", # LC_CTYPE
96+
"isprint", # LC_CTYPE
97+
"ispunct", # LC_CTYPE
98+
"isspace", # LC_CTYPE
99+
"isupper", # LC_CTYPE
100+
"iswalnum", # LC_CTYPE
101+
"iswalpha", # LC_CTYPE
102+
"iswblank", # LC_CTYPE
103+
"iswcntrl", # LC_CTYPE
104+
"iswctype", # LC_CTYPE
105+
"iswdigit", # LC_CTYPE
106+
"iswgraph", # LC_CTYPE
107+
"iswlower", # LC_CTYPE
108+
"iswprint", # LC_CTYPE
109+
"iswpunct", # LC_CTYPE
110+
"iswspace", # LC_CTYPE
111+
"iswupper", # LC_CTYPE
112+
"iswxdigit", # LC_CTYPE
113+
"isxdigit", # LC_CTYPE
114+
"localeconv", # LC_NUMERIC + LC_MONETARY
115+
"mblen", # LC_CTYPE
116+
"mbrlen",
117+
"mbrtowc",
118+
"mbsinit",
119+
"mbsnrtowcs",
120+
"mbsrtowcs",
121+
"mbstowcs", # LC_CTYPE
122+
"mbtowc", # LC_CTYPE
123+
"mktime",
124+
"normalize", # boost::locale::normalize
125+
"printf", # LC_NUMERIC
126+
"putwc",
127+
"putwchar",
128+
"scanf", # LC_NUMERIC
129+
"setlocale",
130+
"snprintf",
131+
"sprintf",
132+
"sscanf",
133+
"std::locale::global",
134+
"std::to_string",
135+
"stod",
136+
"stof",
137+
"stoi",
138+
"stol",
139+
"stold",
140+
"stoll",
141+
"stoul",
142+
"stoull",
143+
"strcasecmp",
144+
"strcasestr",
145+
"strcoll", # LC_COLLATE
146+
#"strerror",
147+
"strfmon",
148+
"strftime", # LC_TIME
149+
"strncasecmp",
150+
"strptime",
151+
"strtod", # LC_NUMERIC
152+
"strtof",
153+
"strtoimax",
154+
"strtol", # LC_NUMERIC
155+
"strtold",
156+
"strtoll",
157+
"strtoq",
158+
"strtoul", # LC_NUMERIC
159+
"strtoull",
160+
"strtoumax",
161+
"strtouq",
162+
"strxfrm", # LC_COLLATE
163+
"swprintf",
164+
"to_lower", # boost::locale::to_lower
165+
"to_title", # boost::locale::to_title
166+
"to_upper", # boost::locale::to_upper
167+
"tolower", # LC_CTYPE
168+
"toupper", # LC_CTYPE
169+
"towctrans",
170+
"towlower", # LC_CTYPE
171+
"towupper", # LC_CTYPE
172+
"trim", # boost::algorithm::trim
173+
"trim_left", # boost::algorithm::trim_left
174+
"trim_right", # boost::algorithm::trim_right
175+
"ungetwc",
176+
"vasprintf",
177+
"vdprintf",
178+
"versionsort",
179+
"vfprintf",
180+
"vfscanf",
181+
"vfwprintf",
182+
"vprintf",
183+
"vscanf",
184+
"vsnprintf",
185+
"vsprintf",
186+
"vsscanf",
187+
"vswprintf",
188+
"vwprintf",
189+
"wcrtomb",
190+
"wcscasecmp",
191+
"wcscoll", # LC_COLLATE
192+
"wcsftime", # LC_TIME
193+
"wcsncasecmp",
194+
"wcsnrtombs",
195+
"wcsrtombs",
196+
"wcstod", # LC_NUMERIC
197+
"wcstof",
198+
"wcstoimax",
199+
"wcstol", # LC_NUMERIC
200+
"wcstold",
201+
"wcstoll",
202+
"wcstombs", # LC_CTYPE
203+
"wcstoul", # LC_NUMERIC
204+
"wcstoull",
205+
"wcstoumax",
206+
"wcswidth",
207+
"wcsxfrm", # LC_COLLATE
208+
"wctob",
209+
"wctomb", # LC_CTYPE
210+
"wctrans",
211+
"wctype",
212+
"wcwidth",
213+
"wprintf"
214+
]
215+
216+
217+
def find_locale_dependent_function_uses():
218+
regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS)
219+
exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS]
220+
git_grep_command = ["git", "grep", "-E", "[^a-zA-Z0-9_\\`'\"<>](" + regexp_locale_dependent_functions + "(_r|_s)?)[^a-zA-Z0-9_\\`'\"<>]", "--", "*.cpp", "*.h"] + exclude_args
221+
git_grep_output = list()
222+
223+
try:
224+
git_grep_output = check_output(git_grep_command, universal_newlines=True, encoding="utf8").splitlines()
225+
except CalledProcessError as e:
226+
if e.returncode > 1:
227+
raise e
228+
229+
return git_grep_output
230+
231+
232+
def main():
233+
exit_code = 0
234+
235+
regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS)
236+
git_grep_output = find_locale_dependent_function_uses()
237+
238+
for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS:
239+
matches = [line for line in git_grep_output
240+
if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", line)
241+
and not re.search("\\.(c|cpp|h):\\s*(//|\\*|/\\*|\").*" + locale_dependent_function, line)
242+
and not re.search(regexp_ignore_known_violations, line)]
243+
if matches:
244+
print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:")
245+
for match in matches:
246+
print(match)
247+
print("")
248+
exit_code = 1
249+
250+
if exit_code == 1:
251+
print("Unnecessary locale depedence can cause bugs that are very tricky to isolate and fix. Please avoid using locale dependent functions if possible.\n")
252+
print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}")
253+
254+
sys.exit(exit_code)
255+
256+
257+
if __name__ == "__main__":
258+
main()

0 commit comments

Comments
 (0)