Skip to content

Commit e21c5e2

Browse files
committed
✨ Add typo detection/fixing for string generation
Problem: - Stable strings can be fixed not just to be consistent from one build to the next, but because hardware expects certain string IDs or ranges. Accidentally having a string in code with a typo could result in another string ID being generated, with unfortunate results. Solution: - Allow typos to be detected and fixed.
1 parent e934914 commit e21c5e2

File tree

8 files changed

+129
-21
lines changed

8 files changed

+129
-21
lines changed

.github/workflows/unit_tests.yml

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,13 @@ jobs:
126126
- name: Install build tools
127127
run: |
128128
${{ matrix.install }}
129-
sudo apt install -y ninja-build
129+
sudo apt install -y ninja-build python3-venv python3-pip
130130
131-
- name: Install libclang for string catalog
131+
- name: Install python requirements for string catalog
132132
run: |
133133
python3 -m venv ${{github.workspace}}/test_venv
134134
source ${{github.workspace}}/test_venv/bin/activate
135-
pip install libclang
135+
pip install -r ${{github.workspace}}/tools/requirements.txt
136136
echo "${{github.workspace}}/test_venv/bin" >> $GITHUB_PATH
137137
138138
- name: Restore CPM cache
@@ -212,7 +212,14 @@ jobs:
212212
- name: Install build tools
213213
run: |
214214
${{ matrix.install }}
215-
sudo apt install -y ninja-build
215+
sudo apt install -y ninja-build python3-venv python3-pip
216+
217+
- name: Install python requirements for string catalog
218+
run: |
219+
python3 -m venv ${{github.workspace}}/test_venv
220+
source ${{github.workspace}}/test_venv/bin/activate
221+
pip install -r ${{github.workspace}}/tools/requirements.txt
222+
echo "${{github.workspace}}/test_venv/bin" >> $GITHUB_PATH
216223
217224
- name: Restore CPM cache
218225
env:
@@ -332,7 +339,14 @@ jobs:
332339
- name: Install build tools
333340
run: |
334341
${{ matrix.install }}
335-
sudo apt install -y ninja-build
342+
sudo apt install -y ninja-build python3-venv python3-pip
343+
344+
- name: Install python requirements for string catalog
345+
run: |
346+
python3 -m venv ${{github.workspace}}/test_venv
347+
source ${{github.workspace}}/test_venv/bin/activate
348+
pip install -r ${{github.workspace}}/tools/requirements.txt
349+
echo "${{github.workspace}}/test_venv/bin" >> $GITHUB_PATH
336350
337351
- name: Restore CPM cache
338352
env:
@@ -378,7 +392,14 @@ jobs:
378392

379393
- name: Install build tools
380394
run: |
381-
sudo apt update && sudo apt install -y gcc-${{env.DEFAULT_GCC_VERSION}} g++-${{env.DEFAULT_GCC_VERSION}} ninja-build valgrind
395+
sudo apt update && sudo apt install -y gcc-${{env.DEFAULT_GCC_VERSION}} g++-${{env.DEFAULT_GCC_VERSION}} ninja-build python3-venv python3-pip valgrind
396+
397+
- name: Install python requirements for string catalog
398+
run: |
399+
python3 -m venv ${{github.workspace}}/test_venv
400+
source ${{github.workspace}}/test_venv/bin/activate
401+
pip install -r ${{github.workspace}}/tools/requirements.txt
402+
echo "${{github.workspace}}/test_venv/bin" >> $GITHUB_PATH
382403
383404
- name: Restore CPM cache
384405
env:

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@
1111
CMakePresets.json
1212
/toolchains
1313
mull.yml
14-
requirements.txt
14+
/requirements.txt
1515
docs/puppeteer_config.json

cmake/string_catalog.cmake

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ function(gen_str_catalog)
1010
VERSION
1111
GUID_ID
1212
GUID_MASK
13-
MODULE_ID_MAX)
13+
MODULE_ID_MAX
14+
STABLE_TYPO_DISTANCE
15+
TYPO_DETECT)
1416
set(multiValueArgs INPUT_JSON INPUT_LIBS INPUT_HEADERS STABLE_JSON)
1517
cmake_parse_arguments(SC "${options}" "${oneValueArgs}" "${multiValueArgs}"
1618
${ARGN})
@@ -67,6 +69,13 @@ function(gen_str_catalog)
6769
if(SC_MODULE_ID_MAX)
6870
set(MODULE_ID_MAX_ARG --module_id_max ${SC_MODULE_ID_MAX})
6971
endif()
72+
if(SC_STABLE_TYPO_DISTANCE)
73+
set(STABLE_TYPO_DISTANCE_ARG --stable_typo_distance
74+
${SC_STABLE_TYPO_DISTANCE})
75+
endif()
76+
if(SC_TYPO_DETECT)
77+
set(TYPO_DETECT_ARG --typo_detect ${SC_TYPO_DETECT})
78+
endif()
7079
if(NOT SC_GEN_STR_CATALOG)
7180
set(SC_GEN_STR_CATALOG ${GEN_STR_CATALOG})
7281
endif()
@@ -79,7 +88,8 @@ function(gen_str_catalog)
7988
--cpp_output ${SC_OUTPUT_CPP} --json_output ${SC_OUTPUT_JSON}
8089
--xml_output ${SC_OUTPUT_XML} --stable_json ${STABLE_JSON}
8190
${FORGET_ARG} ${CLIENT_NAME_ARG} ${VERSION_ARG} ${GUID_ID_ARG}
82-
${GUID_MASK_ARG} ${MODULE_ID_MAX_ARG}
91+
${GUID_MASK_ARG} ${MODULE_ID_MAX_ARG} ${STABLE_TYPO_DISTANCE_ARG}
92+
${TYPO_DETECT_ARG}
8393
DEPENDS ${UNDEFS} ${INPUT_JSON} ${SC_GEN_STR_CATALOG} ${STABLE_JSON}
8494
COMMAND_EXPAND_LISTS)
8595

test/log/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,11 @@ gen_str_catalog(
3838
GUID_ID
3939
"01234567-89ab-cdef-0123-456789abcdef"
4040
GUID_MASK
41-
"ffffffff-ffff-ffff-ffff-ffffffffffff")
41+
"ffffffff-ffff-ffff-ffff-ffffffffffff"
42+
STABLE_TYPO_DISTANCE
43+
1
44+
TYPO_DETECT
45+
fix_quiet)
4246

4347
add_library(catalog_strings STATIC ${CMAKE_CURRENT_BINARY_DIR}/strings.cpp)
4448
target_link_libraries(catalog_strings PUBLIC cib)

test/log/catalog1_lib.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ using log_env1 = stdx::make_env_t<logging::get_level, logging::level::TRACE>;
2626
} // namespace
2727

2828
auto log_zero_args() -> void;
29+
auto log_zero_args_typo() -> void;
2930
auto log_one_ct_arg() -> void;
3031
auto log_one_32bit_rt_arg() -> void;
3132
auto log_one_64bit_rt_arg() -> void;
@@ -39,6 +40,12 @@ auto log_zero_args() -> void {
3940
stdx::ct_format<"A string with no placeholders">());
4041
}
4142

43+
auto log_zero_args_typo() -> void {
44+
auto cfg = logging::binary::config{test_log_args_destination{}};
45+
cfg.logger.log_msg<log_env1>(
46+
stdx::ct_format<"A string with ni placeholders">());
47+
}
48+
4249
auto log_one_ct_arg() -> void {
4350
using namespace stdx::literals;
4451
auto cfg = logging::binary::config{test_log_args_destination{}};

test/log/catalog_app.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ template <> inline auto conc::injected_policy<> = test_conc_policy{};
1111
extern int log_calls;
1212
extern std::uint32_t last_header;
1313
extern auto log_zero_args() -> void;
14+
extern auto log_zero_args_typo() -> void;
1415
extern auto log_one_ct_arg() -> void;
1516
extern auto log_one_32bit_rt_arg() -> void;
1617
extern auto log_one_64bit_rt_arg() -> void;
@@ -30,6 +31,16 @@ TEST_CASE("log zero arguments", "[catalog]") {
3031
CHECK(last_header == ((42u << 4u) | 1u));
3132
}
3233

34+
TEST_CASE("log fixed string with typo", "[catalog]") {
35+
test_critical_section::count = 0;
36+
log_calls = 0;
37+
log_zero_args_typo();
38+
CHECK(test_critical_section::count == 2);
39+
CHECK(log_calls == 1);
40+
// ID 42 is fixed by stable input
41+
CHECK(last_header == ((42u << 4u) | 1u));
42+
}
43+
3344
TEST_CASE("log one compile-time argument", "[catalog]") {
3445
log_calls = 0;
3546
test_critical_section::count = 0;

tools/gen_str_catalog.py

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,15 @@ def extract_string_id(line_m):
5858
module_re = re.compile(r"sc::module_string<sc::undefined<void, char, (.*)>\s?>")
5959

6060

61-
def module_string(module) -> str:
61+
def module_string(module: str) -> str:
6262
string_tuple = module.replace("(char)", "")
6363
return "".join((chr(int(c)) for c in re.split(r"\s*,\s*", string_tuple)))
6464

6565

66+
def msg_string(msg: dict) -> str:
67+
return msg["msg"]
68+
69+
6670
def extract_module_id(line_m):
6771
return module_re.match(line_m.group(3)).group(1)
6872

@@ -85,10 +89,46 @@ def stable_msg_key(msg: dict):
8589

8690

8791
def stable_module_key(module: str):
88-
return module_string(module)
92+
return hash(module_string(module))
93+
94+
95+
def typo_error(s: str, stable: str, i: int) -> str:
96+
raise Exception(f"Error: typo detected: \"{s}\" is similar to \"{stable}\"")
97+
98+
99+
def typo_warn(s: str, stable: str, i: int) -> str:
100+
print(f"Warning: typo detected: \"{s}\" is similar to \"{stable}\"")
101+
return s
102+
103+
104+
def typo_fix(s: str, stable: str, i: int) -> str:
105+
print(f"Warning: typo detected: \"{s}\" is similar to \"{stable}\". Fixing to ID {i}.")
106+
return stable
107+
108+
109+
def typo_fix_quiet(s: str, stable: str, i: int) -> str:
110+
return stable
89111

90112

91-
def read_input(filenames: list[str], stable_ids):
113+
typo_behavior = {
114+
"error": typo_error,
115+
"warn": typo_warn,
116+
"fix": typo_fix,
117+
"fix_quiet": typo_fix_quiet
118+
}
119+
120+
121+
def handle_typo(stable_ids: dict, s: str, d: int, fn, gen) -> str:
122+
if d != 0:
123+
from Levenshtein import distance
124+
for (i, value) in stable_ids.values():
125+
if distance(s, value) <= d:
126+
if fn(s, value, i) == value:
127+
return i
128+
return next(gen)
129+
130+
131+
def read_input(filenames: list[str], stable_ids, typo_distance: int, typo_detect: str):
92132
line_re = re.compile(r"^.*(unsigned int (catalog|module)<(.+?)>\(\))$")
93133

94134
def read_file(filename):
@@ -103,24 +143,24 @@ def read_file(filename):
103143
strings = filter(lambda x: not isinstance(x, str), messages)
104144
modules = filter(lambda x: isinstance(x, str), messages)
105145

106-
def get_id(stable_ids, key_fn, gen, obj):
146+
def get_id(stable_ids, key_fn, string_fn, gen, obj):
107147
key = key_fn(obj)
108148
if key in stable_ids:
109-
return stable_ids[key]
149+
return stable_ids[key][0]
110150
else:
111-
return next(gen)
151+
return handle_typo(stable_ids, string_fn(obj), typo_distance, typo_behavior[typo_detect], gen)
112152

113153
stable_msg_ids, stable_module_ids = stable_ids
114154

115155
old_msg_ids = set(stable_msg_ids.values())
116156
msg_id_gen = itertools.filterfalse(old_msg_ids.__contains__, itertools.count(0))
117-
get_msg_id = partial(get_id, stable_msg_ids, stable_msg_key, msg_id_gen)
157+
get_msg_id = partial(get_id, stable_msg_ids, stable_msg_key, msg_string, msg_id_gen)
118158

119159
old_module_ids = set(stable_module_ids.values())
120160
module_id_gen = itertools.filterfalse(
121161
old_module_ids.__contains__, itertools.count(0)
122162
)
123-
get_module_id = partial(get_id, stable_module_ids, stable_module_key, module_id_gen)
163+
get_module_id = partial(get_id, stable_module_ids, stable_module_key, module_string, module_id_gen)
124164

125165
unique_strings = {i[0][0]: i for i in strings}.values()
126166
return (
@@ -405,6 +445,19 @@ def parse_cmdline():
405445
action="store_true",
406446
help="When on, stable IDs from a previous run are forgotten. By default, those strings are remembered in the output so that they will not be reused in future.",
407447
)
448+
parser.add_argument(
449+
"--stable_typo_distance",
450+
type=int,
451+
default=0,
452+
help="The Levenshtein distance used to detect typos in comparison to stable strings.",
453+
)
454+
parser.add_argument(
455+
"--typo_detect",
456+
type=str,
457+
choices=["error", "warn", "fix", "fix_quiet"],
458+
default="error",
459+
help="What to do when detecting a typo against stable strings.",
460+
)
408461
parser.add_argument(
409462
"--module_id_max",
410463
type=int,
@@ -431,10 +484,10 @@ def main():
431484
stable_catalog = read_stable(args.stable_json)
432485
try:
433486
stable_ids = (
434-
{stable_msg_key(msg): msg["id"] for msg in stable_catalog["messages"]},
435-
{m["string"]: m["id"] for m in stable_catalog["modules"]},
487+
{stable_msg_key(msg): (msg["id"], msg["msg"]) for msg in stable_catalog["messages"]},
488+
{hash(m["string"]): (m["id"], m["string"]) for m in stable_catalog["modules"]},
436489
)
437-
modules, messages = read_input(args.input, stable_ids)
490+
modules, messages = read_input(args.input, stable_ids, args.stable_typo_distance, args.typo_detect)
438491
except Exception as e:
439492
raise Exception(f"{str(e)} from file {args.input}")
440493

tools/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
levenshtein==0.27.1
2+
libclang==18.1.1

0 commit comments

Comments
 (0)