@@ -38,15 +38,15 @@ def _similar_keys(self, current_prefix, key, index, replace_chars):
3838 b_step = key [word_pos ].encode ('utf8' )
3939
4040 if b_step in replace_chars :
41- next_index = index
42- b_replace_char , u_replace_char = replace_chars [ b_step ]
41+ for ( b_replace_char , u_replace_char ) in replace_chars [ b_step ]:
42+ next_index = index
4343
44- next_index = self .dct .follow_bytes (b_replace_char , next_index )
44+ next_index = self .dct .follow_bytes (b_replace_char , next_index )
4545
46- if next_index is not None :
47- prefix = current_prefix + key [start_pos :word_pos ] + u_replace_char
48- extra_keys = self ._similar_keys (prefix , key , next_index , replace_chars )
49- res += extra_keys
46+ if next_index :
47+ prefix = current_prefix + key [start_pos :word_pos ] + u_replace_char
48+ extra_keys = self ._similar_keys (prefix , key , next_index , replace_chars )
49+ res += extra_keys
5050
5151 index = self .dct .follow_bytes (b_step , index )
5252 if index is None :
@@ -67,7 +67,7 @@ def similar_keys(self, key, replaces):
6767
6868 ``replaces`` is an object obtained from
6969 ``DAWG.compile_replaces(mapping)`` where mapping is a dict
70- that maps single-char unicode sitrings to another single-char
70+ that maps single-char unicode strings to (one or more) single-char
7171 unicode strings.
7272
7373 This may be useful e.g. for handling single-character umlauts.
@@ -77,14 +77,18 @@ def similar_keys(self, key, replaces):
7777 @classmethod
7878 def compile_replaces (cls , replaces ):
7979
80- for k , v in replaces .items ():
81- if len (k ) != 1 or len (v ) != 1 :
82- raise ValueError ("Keys and values must be single-char unicode strings." )
80+ for k ,v in replaces .items ():
81+ if len (k ) != 1 :
82+ raise ValueError ("Keys must be single-char unicode strings." )
83+ if (isinstance (v , str ) and len (v ) != 1 ):
84+ raise ValueError ("Values must be single-char unicode strings or non-empty lists of such." )
85+ if isinstance (v , list ) and (any (len (v_entry ) != 1 for v_entry in v ) or len (v ) < 1 ):
86+ raise ValueError ("Values must be single-char unicode strings or non-empty lists of such." )
8387
8488 return dict (
8589 (
8690 k .encode ('utf8' ),
87- ( v .encode ('utf8' ), v ),
91+ [( v_entry .encode ('utf8' ), v_entry ) for v_entry in v ]
8892 )
8993 for k , v in replaces .items ()
9094 )
@@ -326,14 +330,15 @@ def _similar_items(self, current_prefix, key, index, replace_chars):
326330 b_step = key [word_pos ].encode ('utf8' )
327331
328332 if b_step in replace_chars :
329- next_index = index
330- b_replace_char , u_replace_char = replace_chars [ b_step ]
333+ for ( b_replace_char , u_replace_char ) in replace_chars [ b_step ]:
334+ next_index = index
331335
332- next_index = self .dct .follow_bytes (b_replace_char , next_index )
333- if next_index :
334- prefix = current_prefix + key [start_pos :word_pos ] + u_replace_char
335- extra_items = self ._similar_items (prefix , key , next_index , replace_chars )
336- res += extra_items
336+ next_index = self .dct .follow_bytes (b_replace_char , next_index )
337+
338+ if next_index :
339+ prefix = current_prefix + key [start_pos :word_pos ] + u_replace_char
340+ extra_items = self ._similar_items (prefix , key , next_index , replace_chars )
341+ res += extra_items
337342
338343 index = self .dct .follow_bytes (b_step , index )
339344 if not index :
@@ -356,7 +361,7 @@ def similar_items(self, key, replaces):
356361
357362 ``replaces`` is an object obtained from
358363 ``DAWG.compile_replaces(mapping)`` where mapping is a dict
359- that maps single-char unicode sitrings to another single-char
364+ that maps single-char unicode strings to (one or more) single-char
360365 unicode strings.
361366 """
362367 return self ._similar_items ("" , key , self .dct .ROOT , replaces )
@@ -370,13 +375,14 @@ def _similar_item_values(self, start_pos, key, index, replace_chars):
370375 b_step = key [word_pos ].encode ('utf8' )
371376
372377 if b_step in replace_chars :
373- next_index = index
374- b_replace_char , u_replace_char = replace_chars [b_step ]
378+ for (b_replace_char , u_replace_char ) in replace_chars [b_step ]:
379+ next_index = index
380+
381+ next_index = self .dct .follow_bytes (b_replace_char , next_index )
375382
376- next_index = self .dct .follow_bytes (b_replace_char , next_index )
377- if next_index :
378- extra_items = self ._similar_item_values (word_pos + 1 , key , next_index , replace_chars )
379- res += extra_items
383+ if next_index :
384+ extra_items = self ._similar_item_values (word_pos + 1 , key , next_index , replace_chars )
385+ res += extra_items
380386
381387 index = self .dct .follow_bytes (b_step , index )
382388 if not index :
@@ -398,7 +404,7 @@ def similar_item_values(self, key, replaces):
398404
399405 ``replaces`` is an object obtained from
400406 ``DAWG.compile_replaces(mapping)`` where mapping is a dict
401- that maps single-char unicode sitrings to another single-char
407+ that maps single-char unicode strings to (one or more) single-char
402408 unicode strings.
403409 """
404410 return self ._similar_item_values (0 , key , self .dct .ROOT , replaces )
0 commit comments