From e21c5e2083c3d7c2328d513d974d1124b9a6c830 Mon Sep 17 00:00:00 2001 From: Ben Deane Date: Wed, 16 Apr 2025 10:53:30 -0600 Subject: [PATCH 1/2] :sparkles: Add typo detection/fixing for string generation Problem: - Stable strings can be fixed not just to be consistent from one build to the next, but because hardware expects certain string IDs or ranges. Accidentally having a string in code with a typo could result in another string ID being generated, with unfortunate results. Solution: - Allow typos to be detected and fixed. --- .github/workflows/unit_tests.yml | 33 +++++++++++--- .gitignore | 2 +- cmake/string_catalog.cmake | 14 +++++- test/log/CMakeLists.txt | 6 ++- test/log/catalog1_lib.cpp | 7 +++ test/log/catalog_app.cpp | 11 +++++ tools/gen_str_catalog.py | 75 +++++++++++++++++++++++++++----- tools/requirements.txt | 2 + 8 files changed, 129 insertions(+), 21 deletions(-) create mode 100644 tools/requirements.txt diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index f8f0dd38..b76a9f56 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -126,13 +126,13 @@ jobs: - name: Install build tools run: | ${{ matrix.install }} - sudo apt install -y ninja-build + sudo apt install -y ninja-build python3-venv python3-pip - - name: Install libclang for string catalog + - name: Install python requirements for string catalog run: | python3 -m venv ${{github.workspace}}/test_venv source ${{github.workspace}}/test_venv/bin/activate - pip install libclang + pip install -r ${{github.workspace}}/tools/requirements.txt echo "${{github.workspace}}/test_venv/bin" >> $GITHUB_PATH - name: Restore CPM cache @@ -212,7 +212,14 @@ jobs: - name: Install build tools run: | ${{ matrix.install }} - sudo apt install -y ninja-build + sudo apt install -y ninja-build python3-venv python3-pip + + - name: Install python requirements for string catalog + run: | + python3 -m venv ${{github.workspace}}/test_venv + source ${{github.workspace}}/test_venv/bin/activate + pip install -r ${{github.workspace}}/tools/requirements.txt + echo "${{github.workspace}}/test_venv/bin" >> $GITHUB_PATH - name: Restore CPM cache env: @@ -332,7 +339,14 @@ jobs: - name: Install build tools run: | ${{ matrix.install }} - sudo apt install -y ninja-build + sudo apt install -y ninja-build python3-venv python3-pip + + - name: Install python requirements for string catalog + run: | + python3 -m venv ${{github.workspace}}/test_venv + source ${{github.workspace}}/test_venv/bin/activate + pip install -r ${{github.workspace}}/tools/requirements.txt + echo "${{github.workspace}}/test_venv/bin" >> $GITHUB_PATH - name: Restore CPM cache env: @@ -378,7 +392,14 @@ jobs: - name: Install build tools run: | - sudo apt update && sudo apt install -y gcc-${{env.DEFAULT_GCC_VERSION}} g++-${{env.DEFAULT_GCC_VERSION}} ninja-build valgrind + sudo apt update && sudo apt install -y gcc-${{env.DEFAULT_GCC_VERSION}} g++-${{env.DEFAULT_GCC_VERSION}} ninja-build python3-venv python3-pip valgrind + + - name: Install python requirements for string catalog + run: | + python3 -m venv ${{github.workspace}}/test_venv + source ${{github.workspace}}/test_venv/bin/activate + pip install -r ${{github.workspace}}/tools/requirements.txt + echo "${{github.workspace}}/test_venv/bin" >> $GITHUB_PATH - name: Restore CPM cache env: diff --git a/.gitignore b/.gitignore index 06b64f90..d7257e90 100644 --- a/.gitignore +++ b/.gitignore @@ -11,5 +11,5 @@ CMakePresets.json /toolchains mull.yml -requirements.txt +/requirements.txt docs/puppeteer_config.json diff --git a/cmake/string_catalog.cmake b/cmake/string_catalog.cmake index b4f38ac2..ff873f83 100644 --- a/cmake/string_catalog.cmake +++ b/cmake/string_catalog.cmake @@ -10,7 +10,9 @@ function(gen_str_catalog) VERSION GUID_ID GUID_MASK - MODULE_ID_MAX) + MODULE_ID_MAX + STABLE_TYPO_DISTANCE + TYPO_DETECT) set(multiValueArgs INPUT_JSON INPUT_LIBS INPUT_HEADERS STABLE_JSON) cmake_parse_arguments(SC "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -67,6 +69,13 @@ function(gen_str_catalog) if(SC_MODULE_ID_MAX) set(MODULE_ID_MAX_ARG --module_id_max ${SC_MODULE_ID_MAX}) endif() + if(SC_STABLE_TYPO_DISTANCE) + set(STABLE_TYPO_DISTANCE_ARG --stable_typo_distance + ${SC_STABLE_TYPO_DISTANCE}) + endif() + if(SC_TYPO_DETECT) + set(TYPO_DETECT_ARG --typo_detect ${SC_TYPO_DETECT}) + endif() if(NOT SC_GEN_STR_CATALOG) set(SC_GEN_STR_CATALOG ${GEN_STR_CATALOG}) endif() @@ -79,7 +88,8 @@ function(gen_str_catalog) --cpp_output ${SC_OUTPUT_CPP} --json_output ${SC_OUTPUT_JSON} --xml_output ${SC_OUTPUT_XML} --stable_json ${STABLE_JSON} ${FORGET_ARG} ${CLIENT_NAME_ARG} ${VERSION_ARG} ${GUID_ID_ARG} - ${GUID_MASK_ARG} ${MODULE_ID_MAX_ARG} + ${GUID_MASK_ARG} ${MODULE_ID_MAX_ARG} ${STABLE_TYPO_DISTANCE_ARG} + ${TYPO_DETECT_ARG} DEPENDS ${UNDEFS} ${INPUT_JSON} ${SC_GEN_STR_CATALOG} ${STABLE_JSON} COMMAND_EXPAND_LISTS) diff --git a/test/log/CMakeLists.txt b/test/log/CMakeLists.txt index 191156b3..295cb08f 100644 --- a/test/log/CMakeLists.txt +++ b/test/log/CMakeLists.txt @@ -38,7 +38,11 @@ gen_str_catalog( GUID_ID "01234567-89ab-cdef-0123-456789abcdef" GUID_MASK - "ffffffff-ffff-ffff-ffff-ffffffffffff") + "ffffffff-ffff-ffff-ffff-ffffffffffff" + STABLE_TYPO_DISTANCE + 1 + TYPO_DETECT + fix_quiet) add_library(catalog_strings STATIC ${CMAKE_CURRENT_BINARY_DIR}/strings.cpp) target_link_libraries(catalog_strings PUBLIC cib) diff --git a/test/log/catalog1_lib.cpp b/test/log/catalog1_lib.cpp index 6447f2d0..80ae03f8 100644 --- a/test/log/catalog1_lib.cpp +++ b/test/log/catalog1_lib.cpp @@ -26,6 +26,7 @@ using log_env1 = stdx::make_env_t; } // namespace auto log_zero_args() -> void; +auto log_zero_args_typo() -> void; auto log_one_ct_arg() -> void; auto log_one_32bit_rt_arg() -> void; auto log_one_64bit_rt_arg() -> void; @@ -39,6 +40,12 @@ auto log_zero_args() -> void { stdx::ct_format<"A string with no placeholders">()); } +auto log_zero_args_typo() -> void { + auto cfg = logging::binary::config{test_log_args_destination{}}; + cfg.logger.log_msg( + stdx::ct_format<"A string with ni placeholders">()); +} + auto log_one_ct_arg() -> void { using namespace stdx::literals; auto cfg = logging::binary::config{test_log_args_destination{}}; diff --git a/test/log/catalog_app.cpp b/test/log/catalog_app.cpp index 105d9dc9..3b023849 100644 --- a/test/log/catalog_app.cpp +++ b/test/log/catalog_app.cpp @@ -11,6 +11,7 @@ template <> inline auto conc::injected_policy<> = test_conc_policy{}; extern int log_calls; extern std::uint32_t last_header; extern auto log_zero_args() -> void; +extern auto log_zero_args_typo() -> void; extern auto log_one_ct_arg() -> void; extern auto log_one_32bit_rt_arg() -> void; extern auto log_one_64bit_rt_arg() -> void; @@ -30,6 +31,16 @@ TEST_CASE("log zero arguments", "[catalog]") { CHECK(last_header == ((42u << 4u) | 1u)); } +TEST_CASE("log fixed string with typo", "[catalog]") { + test_critical_section::count = 0; + log_calls = 0; + log_zero_args_typo(); + CHECK(test_critical_section::count == 2); + CHECK(log_calls == 1); + // ID 42 is fixed by stable input + CHECK(last_header == ((42u << 4u) | 1u)); +} + TEST_CASE("log one compile-time argument", "[catalog]") { log_calls = 0; test_critical_section::count = 0; diff --git a/tools/gen_str_catalog.py b/tools/gen_str_catalog.py index 485e667c..86afffc3 100755 --- a/tools/gen_str_catalog.py +++ b/tools/gen_str_catalog.py @@ -58,11 +58,15 @@ def extract_string_id(line_m): module_re = re.compile(r"sc::module_string\s?>") -def module_string(module) -> str: +def module_string(module: str) -> str: string_tuple = module.replace("(char)", "") return "".join((chr(int(c)) for c in re.split(r"\s*,\s*", string_tuple))) +def msg_string(msg: dict) -> str: + return msg["msg"] + + def extract_module_id(line_m): return module_re.match(line_m.group(3)).group(1) @@ -85,10 +89,46 @@ def stable_msg_key(msg: dict): def stable_module_key(module: str): - return module_string(module) + return hash(module_string(module)) + + +def typo_error(s: str, stable: str, i: int) -> str: + raise Exception(f"Error: typo detected: \"{s}\" is similar to \"{stable}\"") + + +def typo_warn(s: str, stable: str, i: int) -> str: + print(f"Warning: typo detected: \"{s}\" is similar to \"{stable}\"") + return s + + +def typo_fix(s: str, stable: str, i: int) -> str: + print(f"Warning: typo detected: \"{s}\" is similar to \"{stable}\". Fixing to ID {i}.") + return stable + + +def typo_fix_quiet(s: str, stable: str, i: int) -> str: + return stable -def read_input(filenames: list[str], stable_ids): +typo_behavior = { + "error": typo_error, + "warn": typo_warn, + "fix": typo_fix, + "fix_quiet": typo_fix_quiet +} + + +def handle_typo(stable_ids: dict, s: str, d: int, fn, gen) -> str: + if d != 0: + from Levenshtein import distance + for (i, value) in stable_ids.values(): + if distance(s, value) <= d: + if fn(s, value, i) == value: + return i + return next(gen) + + +def read_input(filenames: list[str], stable_ids, typo_distance: int, typo_detect: str): line_re = re.compile(r"^.*(unsigned int (catalog|module)<(.+?)>\(\))$") def read_file(filename): @@ -103,24 +143,24 @@ def read_file(filename): strings = filter(lambda x: not isinstance(x, str), messages) modules = filter(lambda x: isinstance(x, str), messages) - def get_id(stable_ids, key_fn, gen, obj): + def get_id(stable_ids, key_fn, string_fn, gen, obj): key = key_fn(obj) if key in stable_ids: - return stable_ids[key] + return stable_ids[key][0] else: - return next(gen) + return handle_typo(stable_ids, string_fn(obj), typo_distance, typo_behavior[typo_detect], gen) stable_msg_ids, stable_module_ids = stable_ids old_msg_ids = set(stable_msg_ids.values()) msg_id_gen = itertools.filterfalse(old_msg_ids.__contains__, itertools.count(0)) - get_msg_id = partial(get_id, stable_msg_ids, stable_msg_key, msg_id_gen) + get_msg_id = partial(get_id, stable_msg_ids, stable_msg_key, msg_string, msg_id_gen) old_module_ids = set(stable_module_ids.values()) module_id_gen = itertools.filterfalse( old_module_ids.__contains__, itertools.count(0) ) - get_module_id = partial(get_id, stable_module_ids, stable_module_key, module_id_gen) + get_module_id = partial(get_id, stable_module_ids, stable_module_key, module_string, module_id_gen) unique_strings = {i[0][0]: i for i in strings}.values() return ( @@ -405,6 +445,19 @@ def parse_cmdline(): action="store_true", help="When on, stable IDs from a previous run are forgotten. By default, those strings are remembered in the output so that they will not be reused in future.", ) + parser.add_argument( + "--stable_typo_distance", + type=int, + default=0, + help="The Levenshtein distance used to detect typos in comparison to stable strings.", + ) + parser.add_argument( + "--typo_detect", + type=str, + choices=["error", "warn", "fix", "fix_quiet"], + default="error", + help="What to do when detecting a typo against stable strings.", + ) parser.add_argument( "--module_id_max", type=int, @@ -431,10 +484,10 @@ def main(): stable_catalog = read_stable(args.stable_json) try: stable_ids = ( - {stable_msg_key(msg): msg["id"] for msg in stable_catalog["messages"]}, - {m["string"]: m["id"] for m in stable_catalog["modules"]}, + {stable_msg_key(msg): (msg["id"], msg["msg"]) for msg in stable_catalog["messages"]}, + {hash(m["string"]): (m["id"], m["string"]) for m in stable_catalog["modules"]}, ) - modules, messages = read_input(args.input, stable_ids) + modules, messages = read_input(args.input, stable_ids, args.stable_typo_distance, args.typo_detect) except Exception as e: raise Exception(f"{str(e)} from file {args.input}") diff --git a/tools/requirements.txt b/tools/requirements.txt new file mode 100644 index 00000000..b94f2328 --- /dev/null +++ b/tools/requirements.txt @@ -0,0 +1,2 @@ +levenshtein==0.27.1 +libclang==18.1.1 From e17eb63e80d09ccadd3f8389be47cfcfbcdb7385 Mon Sep 17 00:00:00 2001 From: Ben Deane Date: Fri, 18 Apr 2025 10:48:52 -0600 Subject: [PATCH 2/2] :sparkles: Add `stable_only` policy to gen_str_catalog.py Problem: - If someone wants to be ultra-safe, every string should be in `stable_strings.json`. Solution: - Add a typo policy that requires every string from the code to be found in `stable_strings.json`. --- tools/gen_str_catalog.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/gen_str_catalog.py b/tools/gen_str_catalog.py index 86afffc3..d4b775d4 100755 --- a/tools/gen_str_catalog.py +++ b/tools/gen_str_catalog.py @@ -118,12 +118,14 @@ def typo_fix_quiet(s: str, stable: str, i: int) -> str: } -def handle_typo(stable_ids: dict, s: str, d: int, fn, gen) -> str: +def handle_typo(stable_ids: dict, s: str, d: int, behavior: str, gen) -> str: + if behavior == "stable_only": + raise Exception(f"Error (using policy stable_only): \"{s}\" not found in stable strings.") if d != 0: from Levenshtein import distance for (i, value) in stable_ids.values(): if distance(s, value) <= d: - if fn(s, value, i) == value: + if typo_behavior[behavior](s, value, i) == value: return i return next(gen) @@ -148,7 +150,7 @@ def get_id(stable_ids, key_fn, string_fn, gen, obj): if key in stable_ids: return stable_ids[key][0] else: - return handle_typo(stable_ids, string_fn(obj), typo_distance, typo_behavior[typo_detect], gen) + return handle_typo(stable_ids, string_fn(obj), typo_distance, typo_detect, gen) stable_msg_ids, stable_module_ids = stable_ids @@ -454,9 +456,9 @@ def parse_cmdline(): parser.add_argument( "--typo_detect", type=str, - choices=["error", "warn", "fix", "fix_quiet"], + choices=["stable_only", "error", "warn", "fix", "fix_quiet"], default="error", - help="What to do when detecting a typo against stable strings.", + help="Policy to handle detecting a typo against stable strings.", ) parser.add_argument( "--module_id_max",