Skip to content

Commit 7134327

Browse files
committed
Merge bitcoin/bitcoin#24932: lint: Convert lint-locale-dependence.sh to Python
3043a1b lint: Make known violations more specific in lint-locale-dependence (Dimitri) 229917d lint: Convert lint-locale-dependence.sh to Python (Dimitri) Pull request description: A port of `test/lint/lint-locale-dependence.sh` to a Python-script as part of the request of #24783. Checked for output-consistency. ACKs for top commit: laanwj: Tested and code review ACK 3043a1b Tree-SHA512: 80555cf7aac156bab5488f85098731d1c12a42667fe7d0df0c35487ab8fc951654a70a15351a759282eabab8319f5aabd8bdb153412b9edc3a9033bef64fd609
2 parents e88a52e + 3043a1b commit 7134327

File tree

2 files changed

+259
-241
lines changed

2 files changed

+259
-241
lines changed

test/lint/lint-locale-dependence.py

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) 2018-2022 The Bitcoin Core developers
3+
# Distributed under the MIT software license, see the accompanying
4+
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
5+
#
6+
# Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
7+
# opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
8+
# whereas no such call is made in bitcoind.
9+
#
10+
# Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
11+
# specified by the user's LC_ALL (or LC_*) environment variable as the new
12+
# C locale.
13+
#
14+
# In contrast, bitcoind does not opt in to localization -- no call to
15+
# setlocale(LC_ALL, "") is made and the environment variables LC_* are
16+
# thus ignored.
17+
#
18+
# This results in situations where bitcoind is guaranteed to be running
19+
# with the classic locale ("C") whereas the locale of bitcoin-qt will vary
20+
# depending on the user's environment variables.
21+
#
22+
# An example: Assuming the environment variable LC_ALL=de_DE then the
23+
# call std::to_string(1.23) will return "1.230000" in bitcoind but
24+
# "1,230000" in bitcoin-qt.
25+
#
26+
# From the Qt documentation:
27+
# "On Unix/Linux Qt is configured to use the system locale settings by default.
28+
# This can cause a conflict when using POSIX functions, for instance, when
29+
# converting between data types such as floats and strings, since the notation
30+
# may differ between locales. To get around this problem, call the POSIX function
31+
# setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
32+
# or QCoreApplication to reset the locale that is used for number formatting to
33+
# "C"-locale."
34+
#
35+
# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
36+
# https://stackoverflow.com/a/34878283 for more details.
37+
#
38+
# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent snprintf with strprintf.
39+
40+
import re
41+
import sys
42+
43+
from subprocess import check_output, CalledProcessError
44+
45+
46+
KNOWN_VIOLATIONS = [
47+
"src/dbwrapper.cpp:.*vsnprintf",
48+
"src/test/dbwrapper_tests.cpp:.*snprintf",
49+
"src/test/fuzz/locale.cpp:.*setlocale",
50+
"src/test/fuzz/string.cpp:.*strtol",
51+
"src/test/fuzz/string.cpp:.*strtoul",
52+
"src/test/util_tests.cpp:.*strtoll"
53+
]
54+
55+
REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [
56+
"src/crypto/ctaes/",
57+
"src/leveldb/",
58+
"src/secp256k1/",
59+
"src/minisketch/",
60+
"src/tinyformat.h",
61+
"src/univalue/"
62+
]
63+
64+
LOCALE_DEPENDENT_FUNCTIONS = [
65+
"alphasort", # LC_COLLATE (via strcoll)
66+
"asctime", # LC_TIME (directly)
67+
"asprintf", # (via vasprintf)
68+
"atof", # LC_NUMERIC (via strtod)
69+
"atoi", # LC_NUMERIC (via strtol)
70+
"atol", # LC_NUMERIC (via strtol)
71+
"atoll", # (via strtoll)
72+
"atoq",
73+
"btowc", # LC_CTYPE (directly)
74+
"ctime", # (via asctime or localtime)
75+
"dprintf", # (via vdprintf)
76+
"fgetwc",
77+
"fgetws",
78+
"fold_case", # boost::locale::fold_case
79+
"fprintf", # (via vfprintf)
80+
"fputwc",
81+
"fputws",
82+
"fscanf", # (via __vfscanf)
83+
"fwprintf", # (via __vfwprintf)
84+
"getdate", # via __getdate_r => isspace // __localtime_r
85+
"getwc",
86+
"getwchar",
87+
"is_digit", # boost::algorithm::is_digit
88+
"is_space", # boost::algorithm::is_space
89+
"isalnum", # LC_CTYPE
90+
"isalpha", # LC_CTYPE
91+
"isblank", # LC_CTYPE
92+
"iscntrl", # LC_CTYPE
93+
"isctype", # LC_CTYPE
94+
"isdigit", # LC_CTYPE
95+
"isgraph", # LC_CTYPE
96+
"islower", # LC_CTYPE
97+
"isprint", # LC_CTYPE
98+
"ispunct", # LC_CTYPE
99+
"isspace", # LC_CTYPE
100+
"isupper", # LC_CTYPE
101+
"iswalnum", # LC_CTYPE
102+
"iswalpha", # LC_CTYPE
103+
"iswblank", # LC_CTYPE
104+
"iswcntrl", # LC_CTYPE
105+
"iswctype", # LC_CTYPE
106+
"iswdigit", # LC_CTYPE
107+
"iswgraph", # LC_CTYPE
108+
"iswlower", # LC_CTYPE
109+
"iswprint", # LC_CTYPE
110+
"iswpunct", # LC_CTYPE
111+
"iswspace", # LC_CTYPE
112+
"iswupper", # LC_CTYPE
113+
"iswxdigit", # LC_CTYPE
114+
"isxdigit", # LC_CTYPE
115+
"localeconv", # LC_NUMERIC + LC_MONETARY
116+
"mblen", # LC_CTYPE
117+
"mbrlen",
118+
"mbrtowc",
119+
"mbsinit",
120+
"mbsnrtowcs",
121+
"mbsrtowcs",
122+
"mbstowcs", # LC_CTYPE
123+
"mbtowc", # LC_CTYPE
124+
"mktime",
125+
"normalize", # boost::locale::normalize
126+
"printf", # LC_NUMERIC
127+
"putwc",
128+
"putwchar",
129+
"scanf", # LC_NUMERIC
130+
"setlocale",
131+
"snprintf",
132+
"sprintf",
133+
"sscanf",
134+
"std::locale::global",
135+
"std::to_string",
136+
"stod",
137+
"stof",
138+
"stoi",
139+
"stol",
140+
"stold",
141+
"stoll",
142+
"stoul",
143+
"stoull",
144+
"strcasecmp",
145+
"strcasestr",
146+
"strcoll", # LC_COLLATE
147+
#"strerror",
148+
"strfmon",
149+
"strftime", # LC_TIME
150+
"strncasecmp",
151+
"strptime",
152+
"strtod", # LC_NUMERIC
153+
"strtof",
154+
"strtoimax",
155+
"strtol", # LC_NUMERIC
156+
"strtold",
157+
"strtoll",
158+
"strtoq",
159+
"strtoul", # LC_NUMERIC
160+
"strtoull",
161+
"strtoumax",
162+
"strtouq",
163+
"strxfrm", # LC_COLLATE
164+
"swprintf",
165+
"to_lower", # boost::locale::to_lower
166+
"to_title", # boost::locale::to_title
167+
"to_upper", # boost::locale::to_upper
168+
"tolower", # LC_CTYPE
169+
"toupper", # LC_CTYPE
170+
"towctrans",
171+
"towlower", # LC_CTYPE
172+
"towupper", # LC_CTYPE
173+
"trim", # boost::algorithm::trim
174+
"trim_left", # boost::algorithm::trim_left
175+
"trim_right", # boost::algorithm::trim_right
176+
"ungetwc",
177+
"vasprintf",
178+
"vdprintf",
179+
"versionsort",
180+
"vfprintf",
181+
"vfscanf",
182+
"vfwprintf",
183+
"vprintf",
184+
"vscanf",
185+
"vsnprintf",
186+
"vsprintf",
187+
"vsscanf",
188+
"vswprintf",
189+
"vwprintf",
190+
"wcrtomb",
191+
"wcscasecmp",
192+
"wcscoll", # LC_COLLATE
193+
"wcsftime", # LC_TIME
194+
"wcsncasecmp",
195+
"wcsnrtombs",
196+
"wcsrtombs",
197+
"wcstod", # LC_NUMERIC
198+
"wcstof",
199+
"wcstoimax",
200+
"wcstol", # LC_NUMERIC
201+
"wcstold",
202+
"wcstoll",
203+
"wcstombs", # LC_CTYPE
204+
"wcstoul", # LC_NUMERIC
205+
"wcstoull",
206+
"wcstoumax",
207+
"wcswidth",
208+
"wcsxfrm", # LC_COLLATE
209+
"wctob",
210+
"wctomb", # LC_CTYPE
211+
"wctrans",
212+
"wctype",
213+
"wcwidth",
214+
"wprintf"
215+
]
216+
217+
218+
def find_locale_dependent_function_uses():
219+
regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS)
220+
exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS]
221+
git_grep_command = ["git", "grep", "-E", "[^a-zA-Z0-9_\\`'\"<>](" + regexp_locale_dependent_functions + "(_r|_s)?)[^a-zA-Z0-9_\\`'\"<>]", "--", "*.cpp", "*.h"] + exclude_args
222+
git_grep_output = list()
223+
224+
try:
225+
git_grep_output = check_output(git_grep_command, universal_newlines=True, encoding="utf8").splitlines()
226+
except CalledProcessError as e:
227+
if e.returncode > 1:
228+
raise e
229+
230+
return git_grep_output
231+
232+
233+
def main():
234+
exit_code = 0
235+
236+
regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS)
237+
git_grep_output = find_locale_dependent_function_uses()
238+
239+
for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS:
240+
matches = [line for line in git_grep_output
241+
if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", line)
242+
and not re.search("\\.(c|cpp|h):\\s*(//|\\*|/\\*|\").*" + locale_dependent_function, line)
243+
and not re.search(regexp_ignore_known_violations, line)]
244+
if matches:
245+
print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:")
246+
for match in matches:
247+
print(match)
248+
print("")
249+
exit_code = 1
250+
251+
if exit_code == 1:
252+
print("Unnecessary locale depedence can cause bugs that are very tricky to isolate and fix. Please avoid using locale dependent functions if possible.\n")
253+
print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}")
254+
255+
sys.exit(exit_code)
256+
257+
258+
if __name__ == "__main__":
259+
main()

0 commit comments

Comments
 (0)