@@ -1216,7 +1216,7 @@ def __init__(self, root, omw_reader):
12161216 self .map30 = self .map_wn ()
12171217
12181218 # Language data attributes
1219- self .lg_attrs = ["lemma" , "none " , "def" , "exe" ]
1219+ self .lg_attrs = ["lemma" , "of " , "def" , "exe" ]
12201220
12211221 def index_sense (self , version = None ):
12221222 """Read sense key to synset id mapping from index.sense file in corpus directory"""
@@ -1251,7 +1251,7 @@ def map_to_many(self, version="wordnet"):
12511251 return synset_to_many
12521252
12531253 def map_to_one (self , version = "wordnet" ):
1254- self .nomap [version ] = []
1254+ self .nomap [version ] = set ()
12551255 self .splits [version ] = {}
12561256 synset_to_many = self .map_to_many (version )
12571257 synset_to_one = {}
@@ -1273,7 +1273,7 @@ def map_to_one(self, version="wordnet"):
12731273 # where only Lithuanian and Slovak use the "s" ss_type.
12741274 synset_to_one [f"{ source [:- 1 ]} a" ] = target
12751275 else :
1276- self .nomap [version ].append (source )
1276+ self .nomap [version ].add (source )
12771277 return synset_to_one
12781278
12791279 def map_wn (self , version = "wordnet" ):
@@ -1294,7 +1294,9 @@ def merged_synsets(self, version="wordnet"):
12941294 for source , targets in self .map_to_many (version ).items ():
12951295 for target in targets :
12961296 merge [target ].add (source )
1297- self .merges [version ] = {s : t for s , t in merge .items () if len (t ) > 1 }
1297+ self .merges [version ] = {
1298+ trg : src for trg , src in merge .items () if len (src ) > 1
1299+ }
12981300 return self .merges [version ]
12991301
13001302 # Open Multilingual WordNet functions, contributed by
@@ -2230,8 +2232,9 @@ def custom_lemmas(self, tab_file, lang):
22302232 else :
22312233 # Some OMW offsets were never in Wordnet:
22322234 if (
2233- offset_pos not in self .nomap
2234- and offset_pos .replace ("a" , "s" ) not in self .nomap
2235+ offset_pos not in self .nomap ["wordnet" ]
2236+ and offset_pos .replace ("a" , "s" )
2237+ not in self .nomap ["wordnet" ]
22352238 ):
22362239 warnings .warn (
22372240 f"{ lang } : invalid offset { offset_pos } in '{ line } '"
@@ -2247,11 +2250,15 @@ def custom_lemmas(self, tab_file, lang):
22472250 if len (pair ) == 1 or pair [0 ] == lg :
22482251 if attr == "lemma" :
22492252 val = val .strip ().replace (" " , "_" )
2250- self ._lang_data [lang ][1 ][val .lower ()].append (offset_pos )
2253+ lang_offsets = self ._lang_data [lang ][1 ][val .lower ()]
2254+ if offset_pos not in lang_offsets :
2255+ lang_offsets .append (offset_pos )
22512256 if attr in self .lg_attrs :
2252- self ._lang_data [lang ][self .lg_attrs .index (attr )][
2257+ lang_lemmas = self ._lang_data [lang ][self .lg_attrs .index (attr )][
22532258 offset_pos
2254- ].append (val )
2259+ ]
2260+ if val not in lang_lemmas :
2261+ lang_lemmas .append (val )
22552262
22562263 def disable_custom_lemmas (self , lang ):
22572264 """prevent synsets from being mistakenly added"""
0 commit comments