Skip to content

Commit fd397f8

Browse files
committed
Get split and merged synsets for any pair of English Wordnets
1 parent b82b4ee commit fd397f8

File tree

1 file changed

+20
-5
lines changed

1 file changed

+20
-5
lines changed

nltk/corpus/reader/wordnet.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,9 +1210,10 @@ def __init__(self, root, omw_reader):
12101210

12111211
self.nomap = {}
12121212
self.splits = {}
1213+
self.merges = {}
12131214

12141215
# map from WordNet 3.0 for OMW data
1215-
self.map30 = self.map_wn30()
1216+
self.map30 = self.map_wn()
12161217

12171218
# Language data attributes
12181219
self.lg_attrs = ["lemma", "none", "def", "exe"]
@@ -1275,12 +1276,26 @@ def map_to_one(self, version="wordnet"):
12751276
self.nomap[version].append(source)
12761277
return synset_to_one
12771278

1278-
def map_wn30(self):
1279-
"""Mapping from Wordnet 3.0 to currently loaded Wordnet version"""
1280-
if self.get_version() == "3.0":
1279+
def map_wn(self, version="wordnet"):
1280+
"""Mapping from Wordnet 'version' to currently loaded Wordnet version"""
1281+
if self.get_version() == version:
12811282
return None
12821283
else:
1283-
return self.map_to_one()
1284+
return self.map_to_one(version)
1285+
1286+
def split_synsets(self, version="wordnet"):
1287+
if version not in self.splits:
1288+
_mymap = self.map_to_one(version)
1289+
return self.splits[version]
1290+
1291+
def merged_synsets(self, version="wordnet"):
1292+
if version not in self.merges:
1293+
merge = defaultdict(set)
1294+
for source, targets in self.map_to_many(version).items():
1295+
for target in targets:
1296+
merge[target].add(source)
1297+
self.merges[version] = {s: t for s, t in merge.items() if len(t) > 1}
1298+
return self.merges[version]
12841299

12851300
# Open Multilingual WordNet functions, contributed by
12861301
# Nasruddin A’aidil Shari, Sim Wei Ying Geraldine, and Soe Lynn

0 commit comments

Comments
 (0)