@@ -1208,11 +1208,12 @@ def __init__(self, root, omw_reader):
12081208 # load the exception file data into memory
12091209 self ._load_exception_map ()
12101210
1211- self .nomap = []
1211+ self .nomap = {}
12121212 self .splits = {}
1213+ self .merges = {}
12131214
12141215 # map from WordNet 3.0 for OMW data
1215- self .map30 = self .map_wn30 ()
1216+ self .map30 = self .map_wn ()
12161217
12171218 # Language data attributes
12181219 self .lg_attrs = ["lemma" , "none" , "def" , "exe" ]
@@ -1235,8 +1236,8 @@ def index_sense(self, version=None):
12351236 sensekey_map [sensekey ] = f"{ fields [1 ]} -{ pos } "
12361237 return sensekey_map
12371238
1238- def map_to_many (self ):
1239- sensekey_map1 = self .index_sense ("wordnet" )
1239+ def map_to_many (self , version = "wordnet" ):
1240+ sensekey_map1 = self .index_sense (version )
12401241 sensekey_map2 = self .index_sense ()
12411242 synset_to_many = {}
12421243 for synsetid in set (sensekey_map1 .values ()):
@@ -1249,8 +1250,10 @@ def map_to_many(self):
12491250 synset_to_many [source ].append (target )
12501251 return synset_to_many
12511252
1252- def map_to_one (self ):
1253- synset_to_many = self .map_to_many ()
1253+ def map_to_one (self , version = "wordnet" ):
1254+ self .nomap [version ] = []
1255+ self .splits [version ] = {}
1256+ synset_to_many = self .map_to_many (version )
12541257 synset_to_one = {}
12551258 for source in synset_to_many :
12561259 candidates_bag = synset_to_many [source ]
@@ -1262,23 +1265,37 @@ def map_to_one(self):
12621265 counts = []
12631266 for candidate in candidates_set :
12641267 counts .append ((candidates_bag .count (candidate ), candidate ))
1265- self .splits [source ] = counts
1268+ self .splits [version ][ source ] = counts
12661269 target = max (counts )[1 ]
12671270 synset_to_one [source ] = target
12681271 if source [- 1 ] == "s" :
12691272 # Add a mapping from "a" to target for applications like omw,
12701273 # where only Lithuanian and Slovak use the "s" ss_type.
12711274 synset_to_one [f"{ source [:- 1 ]} a" ] = target
12721275 else :
1273- self .nomap .append (source )
1276+ self .nomap [ version ] .append (source )
12741277 return synset_to_one
12751278
1276- def map_wn30 (self ):
1277- """Mapping from Wordnet 3.0 to currently loaded Wordnet version"""
1278- if self .get_version () == "3.0" :
1279+ def map_wn (self , version = "wordnet" ):
1280+ """Mapping from Wordnet 'version' to currently loaded Wordnet version"""
1281+ if self .get_version () == version :
12791282 return None
12801283 else :
1281- return self .map_to_one ()
1284+ return self .map_to_one (version )
1285+
1286+ def split_synsets (self , version = "wordnet" ):
1287+ if version not in self .splits :
1288+ _mymap = self .map_to_one (version )
1289+ return self .splits [version ]
1290+
1291+ def merged_synsets (self , version = "wordnet" ):
1292+ if version not in self .merges :
1293+ merge = defaultdict (set )
1294+ for source , targets in self .map_to_many (version ).items ():
1295+ for target in targets :
1296+ merge [target ].add (source )
1297+ self .merges [version ] = {s : t for s , t in merge .items () if len (t ) > 1 }
1298+ return self .merges [version ]
12821299
12831300 # Open Multilingual WordNet functions, contributed by
12841301 # Nasruddin A’aidil Shari, Sim Wei Ying Geraldine, and Soe Lynn
0 commit comments