Skip to content

Commit c28cee7

Browse files
committed
Duplicate only real adjective satellites
1 parent 75a63e5 commit c28cee7

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

nltk/corpus/reader/wordnet.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1380,6 +1380,7 @@ def langs(self):
13801380
return list(self.provenances.keys())
13811381

13821382
def _load_lemma_pos_offset_map(self):
1383+
adj_data_file = self._data_file(ADJ)
13831384
for suffix in self._FILEMAP.values():
13841385
# parse each line of the file (ignoring comment lines)
13851386
with self.open("index.%s" % suffix) as fp:
@@ -1425,8 +1426,15 @@ def _next_token():
14251426
# map lemmas and parts of speech to synsets
14261427
self._lemma_pos_offset_map[lemma][pos] = synset_offsets
14271428
if pos == ADJ:
1428-
# Duplicate all adjectives indiscriminately?:
1429-
self._lemma_pos_offset_map[lemma][ADJ_SAT] = synset_offsets
1429+
sat_offsets = []
1430+
for offset in synset_offsets:
1431+
adj_data_file.seek(offset)
1432+
# Check in data.adj if offset pos is ADJ_SAT
1433+
if adj_data_file.readline()[12:13] == ADJ_SAT:
1434+
sat_offsets.append(offset)
1435+
if sat_offsets:
1436+
# Duplicate only real satellites
1437+
self._lemma_pos_offset_map[lemma][ADJ_SAT] = sat_offsets
14301438

14311439
def _load_exception_map(self):
14321440
# load the exception file data into memory

0 commit comments

Comments
 (0)