Skip to content

Commit 1d434aa

Browse files
committed
Avoid duplicate merged OMW synsets and lemmas
1 parent cbe715a commit 1d434aa

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

nltk/corpus/reader/wordnet.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2247,11 +2247,15 @@ def custom_lemmas(self, tab_file, lang):
22472247
if len(pair) == 1 or pair[0] == lg:
22482248
if attr == "lemma":
22492249
val = val.strip().replace(" ", "_")
2250-
self._lang_data[lang][1][val.lower()].append(offset_pos)
2250+
lang_offsets = self._lang_data[lang][1][val.lower()]
2251+
if offset_pos not in lang_offsets:
2252+
lang_offsets.append(offset_pos)
22512253
if attr in self.lg_attrs:
2252-
self._lang_data[lang][self.lg_attrs.index(attr)][
2254+
lang_lemmas = self._lang_data[lang][self.lg_attrs.index(attr)][
22532255
offset_pos
2254-
].append(val)
2256+
]
2257+
if val not in lang_lemmas:
2258+
lang_lemmas.append(val)
22552259

22562260
def disable_custom_lemmas(self, lang):
22572261
"""prevent synsets from being mistakenly added"""

0 commit comments

Comments
 (0)