|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# Copyright (c) 2018-2022 The Bitcoin Core developers |
| 3 | +# Distributed under the MIT software license, see the accompanying |
| 4 | +# file COPYING or http://www.opensource.org/licenses/mit-license.php. |
| 5 | +# |
| 6 | +# Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt |
| 7 | +# opts in to POSIX localization by running setlocale(LC_ALL, "") on startup, |
| 8 | +# whereas no such call is made in bitcoind. |
| 9 | +# |
| 10 | +# Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale |
| 11 | +# specified by the user's LC_ALL (or LC_*) environment variable as the new |
| 12 | +# C locale. |
| 13 | +# |
| 14 | +# In contrast, bitcoind does not opt in to localization -- no call to |
| 15 | +# setlocale(LC_ALL, "") is made and the environment variables LC_* are |
| 16 | +# thus ignored. |
| 17 | +# |
| 18 | +# This results in situations where bitcoind is guaranteed to be running |
| 19 | +# with the classic locale ("C") whereas the locale of bitcoin-qt will vary |
| 20 | +# depending on the user's environment variables. |
| 21 | +# |
| 22 | +# An example: Assuming the environment variable LC_ALL=de_DE then the |
| 23 | +# call std::to_string(1.23) will return "1.230000" in bitcoind but |
| 24 | +# "1,230000" in bitcoin-qt. |
| 25 | +# |
| 26 | +# From the Qt documentation: |
| 27 | +# "On Unix/Linux Qt is configured to use the system locale settings by default. |
| 28 | +# This can cause a conflict when using POSIX functions, for instance, when |
| 29 | +# converting between data types such as floats and strings, since the notation |
| 30 | +# may differ between locales. To get around this problem, call the POSIX function |
| 31 | +# setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication |
| 32 | +# or QCoreApplication to reset the locale that is used for number formatting to |
| 33 | +# "C"-locale." |
| 34 | +# |
| 35 | +# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and |
| 36 | +# https://stackoverflow.com/a/34878283 for more details. |
| 37 | +# |
| 38 | +# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent snprintf with strprintf. |
| 39 | + |
| 40 | +import re |
| 41 | +import sys |
| 42 | + |
| 43 | +from subprocess import check_output, CalledProcessError |
| 44 | + |
| 45 | + |
| 46 | +KNOWN_VIOLATIONS = [ |
| 47 | + "src/dbwrapper.cpp:.*vsnprintf", |
| 48 | + "src/test/dbwrapper_tests.cpp:.*snprintf", |
| 49 | + "src/test/fuzz/locale.cpp:.*setlocale", |
| 50 | + "src/test/fuzz/string.cpp:.*strtol", |
| 51 | + "src/test/fuzz/string.cpp:.*strtoul", |
| 52 | + "src/test/util_tests.cpp:.*strtoll" |
| 53 | +] |
| 54 | + |
| 55 | +REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [ |
| 56 | + "src/crypto/ctaes/", |
| 57 | + "src/leveldb/", |
| 58 | + "src/secp256k1/", |
| 59 | + "src/minisketch/", |
| 60 | + "src/tinyformat.h", |
| 61 | + "src/univalue/" |
| 62 | +] |
| 63 | + |
| 64 | +LOCALE_DEPENDENT_FUNCTIONS = [ |
| 65 | + "alphasort", # LC_COLLATE (via strcoll) |
| 66 | + "asctime", # LC_TIME (directly) |
| 67 | + "asprintf", # (via vasprintf) |
| 68 | + "atof", # LC_NUMERIC (via strtod) |
| 69 | + "atoi", # LC_NUMERIC (via strtol) |
| 70 | + "atol", # LC_NUMERIC (via strtol) |
| 71 | + "atoll", # (via strtoll) |
| 72 | + "atoq", |
| 73 | + "btowc", # LC_CTYPE (directly) |
| 74 | + "ctime", # (via asctime or localtime) |
| 75 | + "dprintf", # (via vdprintf) |
| 76 | + "fgetwc", |
| 77 | + "fgetws", |
| 78 | + "fold_case", # boost::locale::fold_case |
| 79 | + "fprintf", # (via vfprintf) |
| 80 | + "fputwc", |
| 81 | + "fputws", |
| 82 | + "fscanf", # (via __vfscanf) |
| 83 | + "fwprintf", # (via __vfwprintf) |
| 84 | + "getdate", # via __getdate_r => isspace // __localtime_r |
| 85 | + "getwc", |
| 86 | + "getwchar", |
| 87 | + "is_digit", # boost::algorithm::is_digit |
| 88 | + "is_space", # boost::algorithm::is_space |
| 89 | + "isalnum", # LC_CTYPE |
| 90 | + "isalpha", # LC_CTYPE |
| 91 | + "isblank", # LC_CTYPE |
| 92 | + "iscntrl", # LC_CTYPE |
| 93 | + "isctype", # LC_CTYPE |
| 94 | + "isdigit", # LC_CTYPE |
| 95 | + "isgraph", # LC_CTYPE |
| 96 | + "islower", # LC_CTYPE |
| 97 | + "isprint", # LC_CTYPE |
| 98 | + "ispunct", # LC_CTYPE |
| 99 | + "isspace", # LC_CTYPE |
| 100 | + "isupper", # LC_CTYPE |
| 101 | + "iswalnum", # LC_CTYPE |
| 102 | + "iswalpha", # LC_CTYPE |
| 103 | + "iswblank", # LC_CTYPE |
| 104 | + "iswcntrl", # LC_CTYPE |
| 105 | + "iswctype", # LC_CTYPE |
| 106 | + "iswdigit", # LC_CTYPE |
| 107 | + "iswgraph", # LC_CTYPE |
| 108 | + "iswlower", # LC_CTYPE |
| 109 | + "iswprint", # LC_CTYPE |
| 110 | + "iswpunct", # LC_CTYPE |
| 111 | + "iswspace", # LC_CTYPE |
| 112 | + "iswupper", # LC_CTYPE |
| 113 | + "iswxdigit", # LC_CTYPE |
| 114 | + "isxdigit", # LC_CTYPE |
| 115 | + "localeconv", # LC_NUMERIC + LC_MONETARY |
| 116 | + "mblen", # LC_CTYPE |
| 117 | + "mbrlen", |
| 118 | + "mbrtowc", |
| 119 | + "mbsinit", |
| 120 | + "mbsnrtowcs", |
| 121 | + "mbsrtowcs", |
| 122 | + "mbstowcs", # LC_CTYPE |
| 123 | + "mbtowc", # LC_CTYPE |
| 124 | + "mktime", |
| 125 | + "normalize", # boost::locale::normalize |
| 126 | + "printf", # LC_NUMERIC |
| 127 | + "putwc", |
| 128 | + "putwchar", |
| 129 | + "scanf", # LC_NUMERIC |
| 130 | + "setlocale", |
| 131 | + "snprintf", |
| 132 | + "sprintf", |
| 133 | + "sscanf", |
| 134 | + "std::locale::global", |
| 135 | + "std::to_string", |
| 136 | + "stod", |
| 137 | + "stof", |
| 138 | + "stoi", |
| 139 | + "stol", |
| 140 | + "stold", |
| 141 | + "stoll", |
| 142 | + "stoul", |
| 143 | + "stoull", |
| 144 | + "strcasecmp", |
| 145 | + "strcasestr", |
| 146 | + "strcoll", # LC_COLLATE |
| 147 | + #"strerror", |
| 148 | + "strfmon", |
| 149 | + "strftime", # LC_TIME |
| 150 | + "strncasecmp", |
| 151 | + "strptime", |
| 152 | + "strtod", # LC_NUMERIC |
| 153 | + "strtof", |
| 154 | + "strtoimax", |
| 155 | + "strtol", # LC_NUMERIC |
| 156 | + "strtold", |
| 157 | + "strtoll", |
| 158 | + "strtoq", |
| 159 | + "strtoul", # LC_NUMERIC |
| 160 | + "strtoull", |
| 161 | + "strtoumax", |
| 162 | + "strtouq", |
| 163 | + "strxfrm", # LC_COLLATE |
| 164 | + "swprintf", |
| 165 | + "to_lower", # boost::locale::to_lower |
| 166 | + "to_title", # boost::locale::to_title |
| 167 | + "to_upper", # boost::locale::to_upper |
| 168 | + "tolower", # LC_CTYPE |
| 169 | + "toupper", # LC_CTYPE |
| 170 | + "towctrans", |
| 171 | + "towlower", # LC_CTYPE |
| 172 | + "towupper", # LC_CTYPE |
| 173 | + "trim", # boost::algorithm::trim |
| 174 | + "trim_left", # boost::algorithm::trim_left |
| 175 | + "trim_right", # boost::algorithm::trim_right |
| 176 | + "ungetwc", |
| 177 | + "vasprintf", |
| 178 | + "vdprintf", |
| 179 | + "versionsort", |
| 180 | + "vfprintf", |
| 181 | + "vfscanf", |
| 182 | + "vfwprintf", |
| 183 | + "vprintf", |
| 184 | + "vscanf", |
| 185 | + "vsnprintf", |
| 186 | + "vsprintf", |
| 187 | + "vsscanf", |
| 188 | + "vswprintf", |
| 189 | + "vwprintf", |
| 190 | + "wcrtomb", |
| 191 | + "wcscasecmp", |
| 192 | + "wcscoll", # LC_COLLATE |
| 193 | + "wcsftime", # LC_TIME |
| 194 | + "wcsncasecmp", |
| 195 | + "wcsnrtombs", |
| 196 | + "wcsrtombs", |
| 197 | + "wcstod", # LC_NUMERIC |
| 198 | + "wcstof", |
| 199 | + "wcstoimax", |
| 200 | + "wcstol", # LC_NUMERIC |
| 201 | + "wcstold", |
| 202 | + "wcstoll", |
| 203 | + "wcstombs", # LC_CTYPE |
| 204 | + "wcstoul", # LC_NUMERIC |
| 205 | + "wcstoull", |
| 206 | + "wcstoumax", |
| 207 | + "wcswidth", |
| 208 | + "wcsxfrm", # LC_COLLATE |
| 209 | + "wctob", |
| 210 | + "wctomb", # LC_CTYPE |
| 211 | + "wctrans", |
| 212 | + "wctype", |
| 213 | + "wcwidth", |
| 214 | + "wprintf" |
| 215 | +] |
| 216 | + |
| 217 | + |
| 218 | +def find_locale_dependent_function_uses(): |
| 219 | + regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS) |
| 220 | + exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS] |
| 221 | + git_grep_command = ["git", "grep", "-E", "[^a-zA-Z0-9_\\`'\"<>](" + regexp_locale_dependent_functions + "(_r|_s)?)[^a-zA-Z0-9_\\`'\"<>]", "--", "*.cpp", "*.h"] + exclude_args |
| 222 | + git_grep_output = list() |
| 223 | + |
| 224 | + try: |
| 225 | + git_grep_output = check_output(git_grep_command, universal_newlines=True, encoding="utf8").splitlines() |
| 226 | + except CalledProcessError as e: |
| 227 | + if e.returncode > 1: |
| 228 | + raise e |
| 229 | + |
| 230 | + return git_grep_output |
| 231 | + |
| 232 | + |
| 233 | +def main(): |
| 234 | + exit_code = 0 |
| 235 | + |
| 236 | + regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS) |
| 237 | + git_grep_output = find_locale_dependent_function_uses() |
| 238 | + |
| 239 | + for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS: |
| 240 | + matches = [line for line in git_grep_output |
| 241 | + if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", line) |
| 242 | + and not re.search("\\.(c|cpp|h):\\s*(//|\\*|/\\*|\").*" + locale_dependent_function, line) |
| 243 | + and not re.search(regexp_ignore_known_violations, line)] |
| 244 | + if matches: |
| 245 | + print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:") |
| 246 | + for match in matches: |
| 247 | + print(match) |
| 248 | + print("") |
| 249 | + exit_code = 1 |
| 250 | + |
| 251 | + if exit_code == 1: |
| 252 | + print("Unnecessary locale depedence can cause bugs that are very tricky to isolate and fix. Please avoid using locale dependent functions if possible.\n") |
| 253 | + print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}") |
| 254 | + |
| 255 | + sys.exit(exit_code) |
| 256 | + |
| 257 | + |
| 258 | +if __name__ == "__main__": |
| 259 | + main() |
0 commit comments