Skip to content

Commit 43bbed3

Browse files
authored
Merge pull request nltk#3118 from ekaf/wnmap
Allow to map more wordnet versions
2 parents 929e5d7 + fd397f8 commit 43bbed3

File tree

1 file changed

+29
-12
lines changed

1 file changed

+29
-12
lines changed

nltk/corpus/reader/wordnet.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1208,11 +1208,12 @@ def __init__(self, root, omw_reader):
12081208
# load the exception file data into memory
12091209
self._load_exception_map()
12101210

1211-
self.nomap = []
1211+
self.nomap = {}
12121212
self.splits = {}
1213+
self.merges = {}
12131214

12141215
# map from WordNet 3.0 for OMW data
1215-
self.map30 = self.map_wn30()
1216+
self.map30 = self.map_wn()
12161217

12171218
# Language data attributes
12181219
self.lg_attrs = ["lemma", "none", "def", "exe"]
@@ -1235,8 +1236,8 @@ def index_sense(self, version=None):
12351236
sensekey_map[sensekey] = f"{fields[1]}-{pos}"
12361237
return sensekey_map
12371238

1238-
def map_to_many(self):
1239-
sensekey_map1 = self.index_sense("wordnet")
1239+
def map_to_many(self, version="wordnet"):
1240+
sensekey_map1 = self.index_sense(version)
12401241
sensekey_map2 = self.index_sense()
12411242
synset_to_many = {}
12421243
for synsetid in set(sensekey_map1.values()):
@@ -1249,8 +1250,10 @@ def map_to_many(self):
12491250
synset_to_many[source].append(target)
12501251
return synset_to_many
12511252

1252-
def map_to_one(self):
1253-
synset_to_many = self.map_to_many()
1253+
def map_to_one(self, version="wordnet"):
1254+
self.nomap[version] = []
1255+
self.splits[version] = {}
1256+
synset_to_many = self.map_to_many(version)
12541257
synset_to_one = {}
12551258
for source in synset_to_many:
12561259
candidates_bag = synset_to_many[source]
@@ -1262,23 +1265,37 @@ def map_to_one(self):
12621265
counts = []
12631266
for candidate in candidates_set:
12641267
counts.append((candidates_bag.count(candidate), candidate))
1265-
self.splits[source] = counts
1268+
self.splits[version][source] = counts
12661269
target = max(counts)[1]
12671270
synset_to_one[source] = target
12681271
if source[-1] == "s":
12691272
# Add a mapping from "a" to target for applications like omw,
12701273
# where only Lithuanian and Slovak use the "s" ss_type.
12711274
synset_to_one[f"{source[:-1]}a"] = target
12721275
else:
1273-
self.nomap.append(source)
1276+
self.nomap[version].append(source)
12741277
return synset_to_one
12751278

1276-
def map_wn30(self):
1277-
"""Mapping from Wordnet 3.0 to currently loaded Wordnet version"""
1278-
if self.get_version() == "3.0":
1279+
def map_wn(self, version="wordnet"):
1280+
"""Mapping from Wordnet 'version' to currently loaded Wordnet version"""
1281+
if self.get_version() == version:
12791282
return None
12801283
else:
1281-
return self.map_to_one()
1284+
return self.map_to_one(version)
1285+
1286+
def split_synsets(self, version="wordnet"):
1287+
if version not in self.splits:
1288+
_mymap = self.map_to_one(version)
1289+
return self.splits[version]
1290+
1291+
def merged_synsets(self, version="wordnet"):
1292+
if version not in self.merges:
1293+
merge = defaultdict(set)
1294+
for source, targets in self.map_to_many(version).items():
1295+
for target in targets:
1296+
merge[target].add(source)
1297+
self.merges[version] = {s: t for s, t in merge.items() if len(t) > 1}
1298+
return self.merges[version]
12821299

12831300
# Open Multilingual WordNet functions, contributed by
12841301
# Nasruddin A’aidil Shari, Sim Wei Ying Geraldine, and Soe Lynn

0 commit comments

Comments
 (0)