1010from nltk .corpus import wordnet
1111
1212
13- def lesk (context_sentence , ambiguous_word , pos = None , synsets = None ):
13+ def lesk (context_sentence , ambiguous_word , pos = None , synsets = None , lang = "eng" ):
1414 """Return a synset for an ambiguous word in a context.
15-
1615 :param iter context_sentence: The context sentence where the ambiguous word
1716 occurs, passed as an iterable of words.
1817 :param str ambiguous_word: The ambiguous word that requires WSD.
1918 :param str pos: A specified Part-of-Speech (POS).
2019 :param iter synsets: Possible synsets of the ambiguous word.
20+ :param str lang: WordNet language.
2121 :return: ``lesk_sense`` The Synset() object with the highest signature overlaps.
22-
2322 This function is an implementation of the original Lesk algorithm (1986) [1].
24-
2523 Usage example::
26-
2724 >>> lesk(['I', 'went', 'to', 'the', 'bank', 'to', 'deposit', 'money', '.'], 'bank', 'n')
2825 Synset('savings_bank.n.02')
29-
3026 [1] Lesk, Michael. "Automatic sense disambiguation using machine
3127 readable dictionaries: how to tell a pine cone from an ice cream
3228 cone." Proceedings of the 5th Annual International Conference on
@@ -36,7 +32,7 @@ def lesk(context_sentence, ambiguous_word, pos=None, synsets=None):
3632
3733 context = set (context_sentence )
3834 if synsets is None :
39- synsets = wordnet .synsets (ambiguous_word )
35+ synsets = wordnet .synsets (ambiguous_word , lang = lang )
4036
4137 if pos :
4238 synsets = [ss for ss in synsets if str (ss .pos ()) == pos ]
0 commit comments