@@ -58,11 +58,15 @@ def extract_string_id(line_m):
58
58
module_re = re .compile (r"sc::module_string<sc::undefined<void, char, (.*)>\s?>" )
59
59
60
60
61
- def module_string (module ) -> str :
61
+ def module_string (module : str ) -> str :
62
62
string_tuple = module .replace ("(char)" , "" )
63
63
return "" .join ((chr (int (c )) for c in re .split (r"\s*,\s*" , string_tuple )))
64
64
65
65
66
+ def msg_string (msg : dict ) -> str :
67
+ return msg ["msg" ]
68
+
69
+
66
70
def extract_module_id (line_m ):
67
71
return module_re .match (line_m .group (3 )).group (1 )
68
72
@@ -85,10 +89,46 @@ def stable_msg_key(msg: dict):
85
89
86
90
87
91
def stable_module_key (module : str ):
88
- return module_string (module )
92
+ return hash (module_string (module ))
93
+
94
+
95
+ def typo_error (s : str , stable : str , i : int ) -> str :
96
+ raise Exception (f"Error: typo detected: \" { s } \" is similar to \" { stable } \" " )
97
+
98
+
99
+ def typo_warn (s : str , stable : str , i : int ) -> str :
100
+ print (f"Warning: typo detected: \" { s } \" is similar to \" { stable } \" " )
101
+ return s
102
+
103
+
104
+ def typo_fix (s : str , stable : str , i : int ) -> str :
105
+ print (f"Warning: typo detected: \" { s } \" is similar to \" { stable } \" . Fixing to ID { i } ." )
106
+ return stable
107
+
108
+
109
+ def typo_fix_quiet (s : str , stable : str , i : int ) -> str :
110
+ return stable
89
111
90
112
91
- def read_input (filenames : list [str ], stable_ids ):
113
+ typo_behavior = {
114
+ "error" : typo_error ,
115
+ "warn" : typo_warn ,
116
+ "fix" : typo_fix ,
117
+ "fix_quiet" : typo_fix_quiet
118
+ }
119
+
120
+
121
+ def handle_typo (stable_ids : dict , s : str , d : int , fn , gen ) -> str :
122
+ if d != 0 :
123
+ from Levenshtein import distance
124
+ for (i , value ) in stable_ids .values ():
125
+ if distance (s , value ) <= d :
126
+ if fn (s , value , i ) == value :
127
+ return i
128
+ return next (gen )
129
+
130
+
131
+ def read_input (filenames : list [str ], stable_ids , typo_distance : int , typo_detect : str ):
92
132
line_re = re .compile (r"^.*(unsigned int (catalog|module)<(.+?)>\(\))$" )
93
133
94
134
def read_file (filename ):
@@ -103,24 +143,24 @@ def read_file(filename):
103
143
strings = filter (lambda x : not isinstance (x , str ), messages )
104
144
modules = filter (lambda x : isinstance (x , str ), messages )
105
145
106
- def get_id (stable_ids , key_fn , gen , obj ):
146
+ def get_id (stable_ids , key_fn , string_fn , gen , obj ):
107
147
key = key_fn (obj )
108
148
if key in stable_ids :
109
- return stable_ids [key ]
149
+ return stable_ids [key ][ 0 ]
110
150
else :
111
- return next ( gen )
151
+ return handle_typo ( stable_ids , string_fn ( obj ), typo_distance , typo_behavior [ typo_detect ], gen )
112
152
113
153
stable_msg_ids , stable_module_ids = stable_ids
114
154
115
155
old_msg_ids = set (stable_msg_ids .values ())
116
156
msg_id_gen = itertools .filterfalse (old_msg_ids .__contains__ , itertools .count (0 ))
117
- get_msg_id = partial (get_id , stable_msg_ids , stable_msg_key , msg_id_gen )
157
+ get_msg_id = partial (get_id , stable_msg_ids , stable_msg_key , msg_string , msg_id_gen )
118
158
119
159
old_module_ids = set (stable_module_ids .values ())
120
160
module_id_gen = itertools .filterfalse (
121
161
old_module_ids .__contains__ , itertools .count (0 )
122
162
)
123
- get_module_id = partial (get_id , stable_module_ids , stable_module_key , module_id_gen )
163
+ get_module_id = partial (get_id , stable_module_ids , stable_module_key , module_string , module_id_gen )
124
164
125
165
unique_strings = {i [0 ][0 ]: i for i in strings }.values ()
126
166
return (
@@ -405,6 +445,19 @@ def parse_cmdline():
405
445
action = "store_true" ,
406
446
help = "When on, stable IDs from a previous run are forgotten. By default, those strings are remembered in the output so that they will not be reused in future." ,
407
447
)
448
+ parser .add_argument (
449
+ "--stable_typo_distance" ,
450
+ type = int ,
451
+ default = 0 ,
452
+ help = "The Levenshtein distance used to detect typos in comparison to stable strings." ,
453
+ )
454
+ parser .add_argument (
455
+ "--typo_detect" ,
456
+ type = str ,
457
+ choices = ["error" , "warn" , "fix" , "fix_quiet" ],
458
+ default = "error" ,
459
+ help = "What to do when detecting a typo against stable strings." ,
460
+ )
408
461
parser .add_argument (
409
462
"--module_id_max" ,
410
463
type = int ,
@@ -431,10 +484,10 @@ def main():
431
484
stable_catalog = read_stable (args .stable_json )
432
485
try :
433
486
stable_ids = (
434
- {stable_msg_key (msg ): msg ["id" ] for msg in stable_catalog ["messages" ]},
435
- {m ["string" ]: m ["id" ] for m in stable_catalog ["modules" ]},
487
+ {stable_msg_key (msg ): ( msg ["id" ], msg [ "msg" ]) for msg in stable_catalog ["messages" ]},
488
+ {hash ( m ["string" ]): ( m ["id" ], m [ "string" ]) for m in stable_catalog ["modules" ]},
436
489
)
437
- modules , messages = read_input (args .input , stable_ids )
490
+ modules , messages = read_input (args .input , stable_ids , args . stable_typo_distance , args . typo_detect )
438
491
except Exception as e :
439
492
raise Exception (f"{ str (e )} from file { args .input } " )
440
493
0 commit comments