@@ -58,11 +58,15 @@ def extract_string_id(line_m):
5858module_re = re .compile (r"sc::module_string<sc::undefined<void, char, (.*)>\s?>" )
5959
6060
61- def module_string (module ) -> str :
61+ def module_string (module : str ) -> str :
6262 string_tuple = module .replace ("(char)" , "" )
6363 return "" .join ((chr (int (c )) for c in re .split (r"\s*,\s*" , string_tuple )))
6464
6565
66+ def msg_string (msg : dict ) -> str :
67+ return msg ["msg" ]
68+
69+
6670def extract_module_id (line_m ):
6771 return module_re .match (line_m .group (3 )).group (1 )
6872
@@ -85,10 +89,46 @@ def stable_msg_key(msg: dict):
8589
8690
8791def stable_module_key (module : str ):
88- return module_string (module )
92+ return hash (module_string (module ))
93+
94+
95+ def typo_error (s : str , stable : str , i : int ) -> str :
96+ raise Exception (f"Error: typo detected: \" { s } \" is similar to \" { stable } \" " )
97+
98+
99+ def typo_warn (s : str , stable : str , i : int ) -> str :
100+ print (f"Warning: typo detected: \" { s } \" is similar to \" { stable } \" " )
101+ return s
102+
103+
104+ def typo_fix (s : str , stable : str , i : int ) -> str :
105+ print (f"Warning: typo detected: \" { s } \" is similar to \" { stable } \" . Fixing to ID { i } ." )
106+ return stable
107+
108+
109+ def typo_fix_quiet (s : str , stable : str , i : int ) -> str :
110+ return stable
89111
90112
91- def read_input (filenames : list [str ], stable_ids ):
113+ typo_behavior = {
114+ "error" : typo_error ,
115+ "warn" : typo_warn ,
116+ "fix" : typo_fix ,
117+ "fix_quiet" : typo_fix_quiet
118+ }
119+
120+
121+ def handle_typo (stable_ids : dict , s : str , d : int , fn , gen ) -> str :
122+ if d != 0 :
123+ from Levenshtein import distance
124+ for (i , value ) in stable_ids .values ():
125+ if distance (s , value ) <= d :
126+ if fn (s , value , i ) == value :
127+ return i
128+ return next (gen )
129+
130+
131+ def read_input (filenames : list [str ], stable_ids , typo_distance : int , typo_detect : str ):
92132 line_re = re .compile (r"^.*(unsigned int (catalog|module)<(.+?)>\(\))$" )
93133
94134 def read_file (filename ):
@@ -103,24 +143,24 @@ def read_file(filename):
103143 strings = filter (lambda x : not isinstance (x , str ), messages )
104144 modules = filter (lambda x : isinstance (x , str ), messages )
105145
106- def get_id (stable_ids , key_fn , gen , obj ):
146+ def get_id (stable_ids , key_fn , string_fn , gen , obj ):
107147 key = key_fn (obj )
108148 if key in stable_ids :
109- return stable_ids [key ]
149+ return stable_ids [key ][ 0 ]
110150 else :
111- return next ( gen )
151+ return handle_typo ( stable_ids , string_fn ( obj ), typo_distance , typo_behavior [ typo_detect ], gen )
112152
113153 stable_msg_ids , stable_module_ids = stable_ids
114154
115155 old_msg_ids = set (stable_msg_ids .values ())
116156 msg_id_gen = itertools .filterfalse (old_msg_ids .__contains__ , itertools .count (0 ))
117- get_msg_id = partial (get_id , stable_msg_ids , stable_msg_key , msg_id_gen )
157+ get_msg_id = partial (get_id , stable_msg_ids , stable_msg_key , msg_string , msg_id_gen )
118158
119159 old_module_ids = set (stable_module_ids .values ())
120160 module_id_gen = itertools .filterfalse (
121161 old_module_ids .__contains__ , itertools .count (0 )
122162 )
123- get_module_id = partial (get_id , stable_module_ids , stable_module_key , module_id_gen )
163+ get_module_id = partial (get_id , stable_module_ids , stable_module_key , module_string , module_id_gen )
124164
125165 unique_strings = {i [0 ][0 ]: i for i in strings }.values ()
126166 return (
@@ -405,6 +445,19 @@ def parse_cmdline():
405445 action = "store_true" ,
406446 help = "When on, stable IDs from a previous run are forgotten. By default, those strings are remembered in the output so that they will not be reused in future." ,
407447 )
448+ parser .add_argument (
449+ "--stable_typo_distance" ,
450+ type = int ,
451+ default = 0 ,
452+ help = "The Levenshtein distance used to detect typos in comparison to stable strings." ,
453+ )
454+ parser .add_argument (
455+ "--typo_detect" ,
456+ type = str ,
457+ choices = ["error" , "warn" , "fix" , "fix_quiet" ],
458+ default = "error" ,
459+ help = "What to do when detecting a typo against stable strings." ,
460+ )
408461 parser .add_argument (
409462 "--module_id_max" ,
410463 type = int ,
@@ -431,10 +484,10 @@ def main():
431484 stable_catalog = read_stable (args .stable_json )
432485 try :
433486 stable_ids = (
434- {stable_msg_key (msg ): msg ["id" ] for msg in stable_catalog ["messages" ]},
435- {m ["string" ]: m ["id" ] for m in stable_catalog ["modules" ]},
487+ {stable_msg_key (msg ): ( msg ["id" ], msg [ "msg" ]) for msg in stable_catalog ["messages" ]},
488+ {hash ( m ["string" ]): ( m ["id" ], m [ "string" ]) for m in stable_catalog ["modules" ]},
436489 )
437- modules , messages = read_input (args .input , stable_ids )
490+ modules , messages = read_input (args .input , stable_ids , args . stable_typo_distance , args . typo_detect )
438491 except Exception as e :
439492 raise Exception (f"{ str (e )} from file { args .input } " )
440493
0 commit comments